def __init__(self):
     self.root_path = join(expanduser('~'), 'PycharmProjects',
                           'python-vistalytics', 'source')
     self.tree = Tree()
     self.tree_str = TreeStructure()
     self.file_utils = FileUtils()
     self.report_choice = con.INCOME_STATEMENTS
 def _read_csv(self, resource_dir):
     src_dir_path = join(self.root_path, 'in', 'csv', resource_dir)
     files = FileUtils().get_files(src_dir_path)
     data_list = []
     for f in files:
         data_list.append(FileUtils().read_csv(src_dir_path, f))
     return data_list
Example #3
0
def cleanup_temp_files():
    global args, log
    if not args.preserve_temp_files:
        t = MKVTrack(log)
        f_utils = FileUtils(log, args)
        f_utils.delete_temp_files(args.scratch_dir,
                                  t.get_possible_extensions())
 def __init__(self):
     self.root_path = join(expanduser("~"), "PycharmProjects", "python-vistalytics", "source")
     self.ignore_keys = ['Margins % of Sales', 'Profitability', 'Cash Flow Ratios', 'Balance Sheet Items (in %)',
                         'Liquidity/Financial Health', 'Efficiency']
     self.sheet_list = ['Income_Statement', 'Balance_Sheet', 'Cash_Flow_Statement', 'Key_Ratios']
     self.math_utils = MathUtils()
     self.file_utils = FileUtils()
class CurrentQuarterAnalysis(object):
    def __init__(self):
        self.root_path = join(expanduser('~'), 'PycharmProjects',
                              'python-vistalytics', 'source')
        self.tree = Tree()
        self.tree_str = TreeStructure()
        self.file_utils = FileUtils()
        self.report_choice = con.INCOME_STATEMENTS

    def _read_csv(self, resource_dir):
        src_dir_path = join(self.root_path, 'in', 'csv', resource_dir)
        files = self.file_utils.get_files(src_dir_path)
        data_list = []
        for f in files:
            data_list.append(self.file_utils.read_csv(src_dir_path, f))
        return data_list

    def _compare(self, v1, v2):
        a = 1 if (v1 == 0) else 0
        b = 1 if (v2 == 0) else 0
        return True if (a ^ b) == 1 else False

    def process_data(self, data_list):
        for data in data_list:
            if con.TTM in data.columns:
                data.drop(con.TTM, 1, inplace=True)
            for i, v in data.iterrows():
                if self.tree.__contains__(self.tree_str.get_string(i)):
                    size_val = len(v)
                    if self._compare(v[size_val - 2], v[size_val - 1]):
                        print("#############################################")
                        print("Index: " + i + " " + str(v[size_val - 2]) +
                              " : " + str(v[size_val - 1]))

    def run(self):
        print(
            "Choices :\n\n\tDefault: Income Statement\n\t1: Balance Statement\n\t2: Cash Flow Statement"
        )
        c = input()
        try:
            self.report_choice = self.tree_str.report_choices[int(c)]
            if self.report_choice is con.BALANCE_STATEMENTS:
                self.tree_str.get_balance_tree()
            else:
                self.tree_str.get_cash_flow_tree()
        except KeyError:
            self.report_choice = con.INCOME_STATEMENTS
            self.tree_str.get_income_tree()
        except ValueError:
            self.report_choice = con.INCOME_STATEMENTS
            self.tree_str.get_income_tree()
        self.tree = self.tree_str.tree
        data_list = self._read_csv(self.report_choice)
        print(self.tree)
        self.process_data(data_list)
Example #6
0
 def download_photos(self, filepath, results):
     if results.has_key("photos"):
         i = 0
         for photo_link in results["photos"]:
             hu = HTTPUtils(photo_link)
             res = hu.make_request()
             photo_list = photo_link.split(".")
             ext = photo_list[len(photo_list) - 1]
             fu = FileUtils(filepath + str(i) + "." + ext)
             fu.write_binary(res.content)
             i += 1
     else:
         print("[*] No photos to download.")
Example #7
0
 def completar_info_basica(self, img, fecha):
     time.sleep(5)
     self.driver.execute_script('$("#fecha_certificado").val("' + fecha +
                                '")')
     time.sleep(5)
     self.find_element(self.__locators.AGREGAR_IMG_BTN).click()
     FileUtils.seleccionar_img_gui(img)
     self.find_element(self.__locators.TERMIN_CONDIC_INP).click()
     self.find_element(self.__locators.ACEPTAR_BTN).click()
     try:
         self.find_element(self.__locators.ACEPTAR_ADV_BTN).click()
     except TimeoutException:
         pass
Example #8
0
    def run(self):
        results = None
        if self.cf:
            print("[-] Still not implemented. Could not read from file.")
        else:
            try:
                url, params = self.build_request()
                hu = HTTPUtils(url, params=params)
                html = hu.make_request()
                links = ParsingEngine.parse_search(html.content, self.engine)
                if not links:
                    print("[-] Search did not return results. Exiting...")
                    exit()
                # Check if search with these params was made 4 this engine
                filepath = self.check_or_create_params_dir()
                filepath += "/"
                search_filepath = filepath
                # Check if last_result exist
                last_result = self.check_last_result(filepath)
                # Check if today, a search was done with these params
                filepath = self.check_or_create_today_dir(filepath)
                filepath += "/"
                i = 0
                for link in links:
                    try:
                        if last_result == link:
                            print("[ * Result from prior searches found * ]")
                            break
                        hu = HTTPUtils(link)
                        html = hu.make_request()
                        results = ParsingEngine.parse_result(html.content, self.engine)
                        if not results:
                            print("[-] No data retrieved for result: %s" % link)
                            continue
                        lines = self.build_text(results, link)
                        f, e = self.check_or_create_res_dir(filepath, link, i)
                        f += "/"
                        if e: # result already saved
                            continue
                        self.download_photos(f, results)
                        fu = FileUtils(f + "info.txt")
                        if html.encoding:
                            fu.write(lines.encode(html.encoding))
                        else:
                            fu.write(lines)
                        i += 1
                    except Exception as e:
                        print("[-] Search result: %s could not be processed: %s" % (link, str(e)) )
                        import sys, os
                        exc_type, exc_obj, exc_tb = sys.exc_info()
                        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                        print(exc_type, fname, exc_tb.tb_lineno)
                if links and links[0]:
                    fu = FileUtils(search_filepath + "last_result")
                    fu.write(links[0])
                print("[+] %s results processed" % str(i))
            except Exception as e:
                print("[-] Something went wrong: %s" % e)

        return results
def run():
    file_path = FileUtils.get_abs_path(__file__, "./data/emailSample1.txt")
    vocab_path = FileUtils.get_abs_path(__file__, "./data/vocab.txt")

    file_contents = open(file_path, "r").read()
    vocabList = open(vocab_path, "r").read()

    vocabList = vocabList.split("\n")[:-1]

    vocabList_d = {}
    for ea in vocabList:
        value, key = ea.split("\t")[:]
        vocabList_d[key] = value

    print(file_contents)

    word_indices = process_email(file_contents, vocabList_d)
    features = email_features(word_indices, vocabList_d)
    print("Length of feature vector: ", len(features))
    print("Number of non-zero entries: ", np.sum(features))

    spam_mat_path = FileUtils.get_abs_path(__file__, "./data/spamTrain.mat")
    spam_mat = loadmat(spam_mat_path)
    X_train = spam_mat["X"]
    y_train = spam_mat["y"]

    C = 0.1
    spam_svc = SVC(C=0.1, kernel="linear")
    spam_svc.fit(X_train, y_train.ravel())
    print("Training Accuracy:",
          (spam_svc.score(X_train, y_train.ravel())) * 100, "%")

    spam_mat_test_path = FileUtils.get_abs_path(__file__,
                                                "./data/spamTest.mat")
    spam_mat_test = loadmat(spam_mat_test_path)
    X_test = spam_mat_test["Xtest"]
    y_test = spam_mat_test["ytest"]

    print("Test Accuracy:", (spam_svc.score(X_test, y_test.ravel())) * 100,
          "%")

    file_path = FileUtils.get_abs_path(__file__, "./data/spamSample1.txt")
    file_contents = open(file_path, "r").read()

    word_indices = process_email(file_contents, vocabList_d)
    features = email_features(word_indices, vocabList_d)
    features = features.reshape([1, 1899])

    print(spam_svc.predict(features))
    print('1 is spam, 0 is not spam')
Example #10
0
def run():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex3weights.mat")
    mat2 = loadmat(data_path)
    Theta1 = mat2['Theta1']
    Theta2 = mat2['Theta2']

    np.set_printoptions(suppress=True)
    data_path = FileUtils.get_abs_path(__file__, "./data/ex3data1.mat")
    mat = loadmat(data_path)
    X = mat["X"]
    y = mat["y"]

    res = predict_nn(Theta1, Theta2, X)
    print("Accuracy on training set with Neural Network:",
          np.mean((res == y)) * 100)
Example #11
0
 def run(self):
     logger.info('开始获取读取本地地区文件')
     city_data = FileUtils.get_city()
     logger.info('开始获取商家评论数据')
     list(
         map(lambda x: self.crawler_start(x['code']),
             city_data['city'].values()))
Example #12
0
def run():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex2data1.txt")
    data = np.loadtxt(data_path, delimiter=',')
    n = np.size(data, 1)
    x = data[:, range(n - 1)]
    y = data[:, n - 1]
    m = np.size(y, 0)
    x = np.reshape(x, [m, n - 1])
    y = np.reshape(y, [m, 1])
    ones = np.ones([m, 1])
    x = np.hstack([ones, x])
    theta = np.zeros([n, 1])
    cost, grad = cost_function(theta, x, y)

    print("Cost with theta [0;0;0]: ", cost)
    print('Theta Result with [0;0;0]:\n', grad)

    test_theta = str2arr('[-24; 0.2; 0.2]')
    cost, grad = cost_function(test_theta, x, y)

    print("Cost with theta [-24; 0.2; 0.2]: ", cost)
    print('Theta Result with [-24; 0.2; 0.2]:\n', grad)

    Result = op.minimize(fun=cost_function,
                         x0=theta,
                         args=(x, y),
                         method='TNC',
                         jac=True)
    optimal_theta = Result.x
    print('Optimal theta: ', optimal_theta)

    res = predict(optimal_theta, x)
    print("Accuracy:", np.mean(((res == y).flatten())) * 100)
    plot_decision_boundary(optimal_theta, x, y)
Example #13
0
def run():
    np.set_printoptions(suppress=True)

    data_path = FileUtils.get_abs_path(__file__, "./data/ex3data1.mat")
    mat = loadmat(data_path)
    X = mat["X"]
    y = mat["y"]
    fig, axis = plt.subplots(10, 10, figsize=(12, 12))
    for i in range(10):
        for j in range(10):
            axis[i,
                 j].imshow(X[np.random.randint(0, 5001), :].reshape(20,
                                                                    20,
                                                                    order="F"),
                           cmap="hot")  # reshape back to 20 pixel by 20 pixel
            axis[i, j].axis("off")
    plt.show()

    theta_t = str2arr('[-2; -1; 1; 2]')
    X_t = np.array([np.linspace(0.1, 1.5, 15)]).reshape(3, 5).T
    X_t = np.hstack((np.ones((5, 1)), X_t))
    y_t = (str2arr('[1;0;1;0;1]'))
    lambda_t = 3
    cost, grad = cost_function_regularized(theta_t, X_t, y_t, lambda_t)

    print("Cost:", cost, "Expected cost: 2.534819")
    print(
        "Gradients:\n", grad,
        "\nExpected gradients:\n 0.146561\n -0.548558\n 0.724722\n 1.398003")

    lambda_value = 0.1
    num_labels = 10
    all_theta = one_vs_all(X, y, num_labels, lambda_value)
    res = predict_one_vs_all(all_theta, X)
    print("Accuracy on training set with OneVsAll:", np.mean((res == y)) * 100)
Example #14
0
    def get_vector(user_comments_full_path, most_frequent_words):
        """ calculate feature vector for user, based on most frequent words and basic features of that comments
        :param user_comments_full_path: path all comments of specific user
        :param most_frequent_words: set of most_frequent_words
        :return: feature vector for user
        """
        comments_train = FileUtils.get_list_of_comments(
            user_comments_full_path)

        word_feq_dict_train = dict(
            FeatureExtractor.get_word_list_frequency(comments_train))

        basic_features_value_list = FeatureExtractor.get_basic_features(
            comments_train)

        word_freq_feature_value_list = []

        for word in most_frequent_words:
            if word in word_feq_dict_train:
                word_freq_feature_value_list.append(word_feq_dict_train[word])
            else:
                word_freq_feature_value_list.append(0)

        vector = basic_features_value_list + word_freq_feature_value_list
        return vector
class TestRunner:
    validator = Validator(
        os.path.join(FileUtils.get_schemadir(), "merged-syntax.xsd"))

    @staticmethod
    def testOK(mapping_sample, case_prefix):
        files = os.listdir(mapping_sample)
        ok_prefix = case_prefix + "_ok"

        for sample_file in files:
            if sample_file.startswith(ok_prefix) is True:
                file_path = os.path.join(mapping_sample, sample_file)
                if TestRunner.validator.validate_file(file_path,
                                                      verbose=False) is False:
                    TestRunner.validator.validate_file(file_path, verbose=True)
                    logger.error(sample_file + " is not valid, it should be")
                    return False
                logger.info(sample_file + " is valid: fine")
        return True

    @staticmethod
    def testKO(mapping_sample, case_prefix):

        ko_prefix = case_prefix + "_ko"
        files = os.listdir(mapping_sample)

        for sample_file in files:
            if sample_file.startswith(ko_prefix) is True:
                file_path = os.path.join(mapping_sample, sample_file)
                if TestRunner.validator.validate_file(file_path,
                                                      verbose=False) is True:
                    logger.error(sample_file)
                    return False
                logger.info(sample_file + " is not valid: fine")
        return True
Example #16
0
class QuarterlyAverageChangeAnalysis(object):
    def __init__(self):
        self.root_path = join(expanduser('~'), "PycharmProjects",
                              "python-vistalytics", "source")
        self.math_utils = MathUtils()
        self.file_utils = FileUtils()

    def run(self):
        path = join(self.root_path, "in", 'quarter')
        file_list = self.file_utils.get_files(path)
        for f in file_list:
            key_list = []
            change = []
            percent_change = []
            data = self.file_utils.read_csv(path, f)

            if 'TTM' in data:
                data = data.drop('TTM', 1)

            for i, v in data.iterrows():
                # Prepare Columns
                key_list.append(i)
                size_val = len(v)
                change.append(
                    self.math_utils.get_change(v[size_val - 5],
                                               v[size_val - 1]))
                percent_change.append(
                    self.math_utils.percentage_change(v[size_val - 5],
                                                      v[size_val - 1]))

            # Write csv.
            output_data = {
                'Keys': key_list,
                'Year Over Year Change': change,
                'Year Over Year Change (%)': percent_change
            }

            output_data = pd.DataFrame(output_data,
                                       columns=[
                                           'Keys', 'Year Over Year Change',
                                           'Year Over Year Change (%)'
                                       ])

            output_dir = join(
                self.root_path, 'out',
                splitext(f)[0].replace(" ", "_") + "_Quarterly_Report.csv")
            self.file_utils.write_csv(output_dir, output_data)
def plot_data():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex1data1.txt")
    data = np.loadtxt(data_path, delimiter=',')
    x = data[:, 0]
    y = data[:, 1]
    plt.scatter(x, y, marker='x', cmap='red')
    plt.xlabel("Population of City in 10,000s")
    plt.ylabel('Profit in $10,000s')
Example #18
0
def load_project_config():
    """Loads nordlys config file. If local file is provided, global one is ignored."""
    config_path = os.sep.join([PROJECT_DIR, "config", "config.ini"])
    try:
        if os.path.exists(config_path):
            return FileUtils.load_config(config_path)
    except Exception as e:
        print("Error loading config file: ", e)
        sys.exit(1)
Example #19
0
def run():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex6data1.mat")
    mat = loadmat(data_path)
    X = mat["X"]
    y = mat["y"]

    plot_data(X, y)

    classifier = SVC(C=1, kernel="linear")
    classifier.fit(X, np.ravel(y))

    plot_svc(classifier, X)

    x1 = np.array([1, 2, 1])
    x2 = np.array([0, 4, -1])
    sigma = 2

    print(gaussian_kernel(x1, x2, sigma))

    data_path = FileUtils.get_abs_path(__file__, "./data/ex6data2.mat")
    data2 = loadmat(data_path)

    y2 = data2['y']
    X2 = data2['X']

    plot_data(X2, y2)

    clf2 = SVC(kernel='rbf', gamma=30)
    clf2.fit(X2, y2.ravel())
    plot_svc(clf2, X2)

    data_path = FileUtils.get_abs_path(__file__, "./data/ex6data3.mat")
    data3 = loadmat(data_path)
    X3 = data3["X"]
    y3 = data3["y"]
    Xval = data3["Xval"]
    yval = data3["yval"]

    plot_data(X3, y3)
    C, gamma = dataset_3_params(X3, y3, Xval, yval)
    clf3 = SVC(C=C, gamma=gamma)
    clf3.fit(X3, y3.ravel())
    plot_svc(clf3, X3)
Example #20
0
class YearlyAverageChangeAnalysis(object):
    def __init__(self):
        self.root_path = join(expanduser("~"), "PycharmProjects", "python-vistalytics", "source")
        self.file_utils = FileUtils()
        self.math_utils = MathUtils()

    def run(self):
        src_dir_path = join(self.root_path, "in", "annual")
        file_list = self.file_utils.get_files(src_dir_path)

        for f in file_list:
            key_list = []
            avg_change_3_years_list = []
            percentage_change_3_years_list = []
            avg_change_5_years_list = []
            percentage_change_5_years_list = []
            data = self.file_utils.read_csv(src_dir_path, f)
            if 'TTM' in data:
                data = data.drop('TTM', 1)
            for i, v in data.iterrows():
                # Prepare columns.
                size_val = len(v)
                key_list.append(i)
                avg_change_3_years_list.append(self.math_utils.average_change(v[size_val - 3:]))
                percentage_change_3_years_list.append(self.math_utils.percentage_change(v[size_val - 3],
                                                                                        v[size_val - 1]))
                avg_change_5_years_list.append(self.math_utils.average_change(v[size_val - 5:]))
                percentage_change_5_years_list.append(self.math_utils.percentage_change(v[size_val - 5],
                                                                                        v[size_val - 1]))

            # Write to csv.
            output_data = {'Keys': key_list,
                           'Average Change Over Last 3 Years': avg_change_3_years_list,
                           'Average Change Over Last 3 Years (%)': percentage_change_3_years_list,
                           'Average Change Over Last 5 Years': avg_change_5_years_list,
                           'Average Change Over Last 5 Years (%)': percentage_change_5_years_list}
            output_data_frame = pd.DataFrame(output_data, columns=['Keys', 'Average Change Over Last 3 Years',
                                                                   'Average Change Over Last 3 Years (%)',
                                                                   'Average Change Over Last 5 Years',
                                                                   'Average Change Over Last 5 Years (%)'])
            output_dir_path = join(self.root_path, "out",
                                   splitext(f)[0].replace(" ", "_") + "_Annual_Report.csv")
            self.file_utils.write_csv(output_dir_path, output_data_frame)
Example #21
0
def run():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex7data2.mat")
    mat = loadmat(data_path)
    X = mat["X"]
    K = 3
    initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])
    idx = find_closest_centroids(X, initial_centroids)
    print("Closest centroids for the first 3 examples:\n", idx[0:3])

    centroids = compute_centroids(X, idx, K)
    print("Centroids computed after initial finding of closest centroids:\n",
          centroids)
    m, n = X.shape[0], X.shape[1]
    initial_centroids = init_random_centroid(X, K)
    idx = find_closest_centroids(X, initial_centroids)
    plot_kmeans(X, initial_centroids, idx, K, 10)
    plt.show()

    data_path = FileUtils.get_abs_path(__file__, "./data/bird_small.png")
    A = plt.imread(data_path)
    A /= 255
    img_size1, img_size2, rgb = A.shape
    X2 = A.reshape(img_size1 * img_size2, 3)

    K2 = 16
    num_iters = 10
    initial_centroids2 = init_random_centroid(X2, K2)
    centroids2, idx2 = run_kmeans(X2, initial_centroids2, num_iters, K2)

    X2_recovered = centroids2[idx2, :].reshape(A.shape)

    fig, ax = plt.subplots(1, 2, figsize=(8, 4))
    ax[0].imshow(A * 255)
    ax[0].set_title('Original')
    ax[0].grid(False)

    # Display compressed image, rescale back by 255
    ax[1].imshow(X2_recovered * 255)
    ax[1].set_title('Compressed, with %d colors' % K2)
    ax[1].grid(False)

    plt.show()
Example #22
0
def run():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex1data1.txt")
    data = np.loadtxt(data_path, delimiter=',')
    n = np.size(data, 1)
    x = data[:, range(n - 1)]
    y = data[:, n - 1]
    m = np.size(y, 0)
    x = np.reshape(x, [m, n - 1])
    y = np.reshape(y, [m, 1])
    ones = np.ones([m, 1])
    x = np.hstack([ones, x])

    theta = np.zeros([n, 1])
    alpha = 0.01
    iterations = 1500
    cost = cost_function_j(x, y, theta)
    print('Cost', cost)

    thetaRes, j_hist = gradient_descent(x, y, theta, alpha, iterations)
    print(thetaRes)

    cost = cost_function_j(x, y, str2arr('[-1;2]'))
    print(cost)

    theta0_vals = np.linspace(-10, 10, 100)
    theta1_vals = np.linspace(-1, 4, 100)
    J_vals = np.zeros([len(theta0_vals), len(theta1_vals)])
    for i in range(len(theta0_vals)):
        for j in range(len(theta1_vals)):
            t = np.vstack([theta0_vals[i], theta1_vals[j]])
            J_vals[i, j] = cost_function_j(x, y, t)

    pltData.plot_data()
    plt.plot(x[:, 1], x @ thetaRes, '-', color='red')

    fig1 = plt.figure()
    ax = fig1.add_subplot(111)

    ax.contour(theta0_vals, theta1_vals, J_vals, np.logspace(-2, 3, 20))

    fig2 = plt.figure()
    ax2 = fig2.add_subplot(111, projection='3d')
    theta0_vals, theta1_vals = np.meshgrid(theta0_vals, theta1_vals)

    ax2.plot_surface(theta0_vals, theta1_vals, np.transpose(J_vals))
    plt.show()
Example #23
0
def run():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex8data1.mat")
    mat = loadmat(data_path)
    X = mat["X"]
    Xval = mat["Xval"]
    yval = mat["yval"]

    plt.scatter(X[:, 0], X[:, 1], marker="x")
    plt.xlim(0, 30)
    plt.ylim(0, 30)
    plt.xlabel("Latency (ms)")
    plt.ylabel("Throughput (mb/s)")
    plt.show()

    mu, sigma2 = estimate_gaussian(X)

    p = multivariate_gaussian(X, mu, sigma2)

    visualize_fit(X, mu, sigma2)

    pval = multivariate_gaussian(Xval, mu, sigma2)
    epsilon, F1 = select_threshold(yval, pval)
    print("Best epsilon found using cross-validation:", epsilon)
    print("Best F1 on Cross Validation Set:", F1)

    outliers = np.nonzero(p < epsilon)[0]
    plt.scatter(X[outliers, 0],
                X[outliers, 1],
                marker="o",
                facecolor="none",
                edgecolor="r",
                s=70)
    plt.xlim(0, 35)
    plt.ylim(0, 35)
    plt.xlabel("Latency (ms)")
    plt.ylabel("Throughput (mb/s)")

    plt.show()
    def get_most_frequent_word_betwenn_all_commenters(
            path, most_frequent_word_per_author):
        """get most frequent words used between all commenters (users)
        :param path: path of commenter's comments
        :param most_frequent_word_per_author: count of most frequent word per user to be consider
        :return: most frequent words set
        """
        most_frequent_words = set()
        for users_comments_file in sorted(os.listdir(path)):
            """ for each author get top-most frequent word and added that to word-set"""
            comments_train = FileUtils.get_list_of_comments(
                os.sep.join([path, users_comments_file]))
            word_list_train = WordBaseFeatureExtractor.get_word_list_frequency(
                comments_train)

            top_words = [word_freq[0] for word_freq in word_list_train]
            top_words = collections.OrderedDict.fromkeys(top_words)
            top_words = list(top_words.keys())
            top_words = [
                word for word in top_words[:most_frequent_word_per_author]
            ]
            most_frequent_words |= set(list(top_words))
        return most_frequent_words
Example #25
0
def run():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex1data2.txt")
    data = np.loadtxt(data_path, delimiter=',')
    n = np.size(data, 1)
    x = data[:, range(n - 1)]
    y = data[:, n - 1]
    m = np.size(y, 0)
    x = np.reshape(x, [m, n - 1])
    y = np.reshape(y, [m, 1])
    ones = np.ones([m, 1])
    X, mu, sigma = feature_normalize(x)
    x = np.hstack([ones, x])
    X = np.hstack([ones, X])
    theta = np.zeros([n, 1])
    alpha = 0.01
    iterations = 400

    cost = cost_function_j(X, y, theta)
    print('Cost', cost)

    thetaRes, j_hist = gradient_descent(X, y, theta, alpha, iterations)
    print('Theta using gradient descent:\n', thetaRes)

    print('Price of 1650 sq ft and 3 bedroom house: ',
          predict([[1653, 3]], thetaRes, mu, sigma))

    plt.plot(range(400), j_hist)
    plt.xlabel("No of iterations")
    plt.ylabel("Cost")
    plt.title("Gradient Descent")
    plt.show()

    thetaRes = normal_equation(x, y)
    print('Theta using normal equation: \n', thetaRes)

    cost = thetaRes.T @ np.array([[1], [1650], [3]])
    print('Price of 1650 sq ft and 3 bedroom house: ', cost[0][0])
Example #26
0
from utils.doregister import SignUp
from utils.doUpload import Upload
from utils.db_utils import DbUtils
from utils.tf_utils import TfUtils
from utils.imgur_utils import ImgurUtils
from utils.file_utils import FileUtils

#delete random lib after we add in classification
import random

# Initialize App w/ config
app = Flask(__name__)
# TODO: move all config related stuff to separate app config class
#Upload = Upload()
#Upload.initialize_app_upload(app)
fu = FileUtils()
photo_set = fu.get_upload_set()
fu.initialize_app_image_storage(app, photo_set)


@app.route('/')
@app.route('/home')
def home():
    return render_template('home.html')


@app.route('/login', methods=['GET', 'POST'])
def login():
    if request.method == 'GET':
        if session.get('logged_in'):
            #TODO need to update session info here if person goes directly to profile so we have album_id
'''
Created on 2021/07/01

@author: laurentmichel
'''
import unittest
from utils.file_utils import FileUtils
from utils.test_runner import TestRunner

mapping_sample = FileUtils.get_datadir()


class Test(unittest.TestCase):
    def testOK(self):
        self.assertTrue(TestRunner.testOK(mapping_sample, "test_1"),
                        "file should be valid")

    def testKO(self):
        self.assertTrue(TestRunner.testKO(mapping_sample, "test_1"),
                        "file shouldn't be valid")


if __name__ == "__main__":
    # import sys;sys.argv = ['', 'Test.testName']
    unittest.main()
Example #28
0
def main():
    # Main program begins
    global args, log, app_path
    log = logging.getLogger("xenonmkv")
    console_handler = logging.StreamHandler()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s [%(levelname)s] %(message)s'
    )
    console_handler.setFormatter(formatter)
    log.addHandler(console_handler)

    dependencies = ('mkvinfo', 'mediainfo', 'mkvextract',
                    'mplayer', 'faac', 'MP4Box')

    parser = argparse.ArgumentParser(description='Parse command line arguments '
                                     'for XenonMKV.')
    parser.add_argument('source_file', help='Path to the source MKV file')
    parser.add_argument('-d', '--destination',
                        help="""Directory to output the destination .mp4 file
                        (default: current directory)""",
                        default='.')
    parser.add_argument('-sd', '--scratch-dir',
                        help="""Specify a scratch directory where temporary files should
                         be stored""",
                        default=None)
    parser.add_argument('-cfg', '--config-file',
                        help="""Provide a configuration file that
                         contains default arguments or settings for the application""",
                        default='')
    parser.add_argument("-p", '--profile',
                        help="""Select a standardized device profile for encoding.
                         Current profile options are: xbox360, playbook""",
                        default="")

    output_group = parser.add_argument_group("Output options")
    output_group.add_argument('-q', '--quiet',
                              help="""Do not display output or progress from tools,
                               or prompt for input""",
                              action='store_true')
    output_group.add_argument('-v', '--verbose', help='Verbose output',
                              action='store_true')
    output_group.add_argument('-vv', '--debug',
                              help='Highly verbose debug output',
                              action='store_true')
    output_group.add_argument('-pf', '--print-file',
                              help='Output filenames before and after converting',
                              action='store_true')

    video_group = parser.add_argument_group("Video options",
                                            "Options for processing video.")
    video_group.add_argument('-nrp', '--no-round-par',
                             help="""When processing video, do not round pixel aspect
                             ratio from 0.98 to 1.01 to 1:1.""",
                             action='store_true')
    video_group.add_argument('-irf', '--ignore-reference-frames',
                             help="""If the source video has too many reference frames
                              to play on low-powered devices (Xbox, PlayBook), continue
                              converting anyway""",
                             action='store_true')

    audio_group = parser.add_argument_group("Audio options",
                                            "Select custom audio decoding and "
                                            "encoding options.")
    audio_group.add_argument('-c', '--channels',
                             help="""Specify the maximum number of channels that are
                              acceptable in the output file. Certain devices (Xbox) will
                              not play audio with more than two channels. If the audio
                              needs to be re-encoded at all, it will be downmixed to two
                              channels only. Possible values for this option are 2
                              (stereo); 4 (surround); 5.1 or 6 (full 5.1); 7.1 or 8
                              (full 7.1 audio).
                              For more details, view the README file.""",
                             default=6)
    audio_group.add_argument('-fq', '--faac-quality',
                             help="""Quality setting for FAAC when encoding WAV files
                              to AAC. Defaults to 150 (see
                              http://wiki.hydrogenaudio.org/index.php?title=FAAC)""",
                             default=150)

    track_group = parser.add_argument_group("Track options",
                                            "These options determine how multiple tracks "
                                            "in MKV files are selected.")
    track_group.add_argument('-st', '--select-tracks',
                             help="""If there are multiple tracks in the MKV file, prompt
                             to select which ones will be used. By default, the last video
                              and audio tracks flagged as 'default' in the MKV file will
                             be used. This option requires interactive user input, so do
                             not use it in batch processing or scripts.""",
                             action='store_true')
    track_group.add_argument('-vt', '--video-track',
                             help="""Use the specified video track. If not present in
                             the file, the default track will be used.""",
                             type=int)
    track_group.add_argument('-at', '--audio-track',
                             help="""Use the specified audio track. If not present in
                             the file, the default track will be used.""",
                             type=int)
    track_group.add_argument('-lang', '--preferred-language',
                             help="""Provide a preferred language code in ISO 639-1 format
                              ('en' for English, 'fr' for French, etc.)
                             When picking tracks, this language will be preferred.""")

    proc_group = parser.add_argument_group("File and processing options",
                                           """These options determine how XenonMKV
                                           processes files and their contents.""")
    proc_group.add_argument('-rp', '--resume-previous',
                            help="""Resume a previous run (do not recreate files
                            if they already exist). Useful for debugging quickly if a
                            conversion has already partially succeeded.""",
                            action='store_true')
    proc_group.add_argument('-n', '--name',
                            help="""Specify a name for the final MP4 container.
                            Defaults to the original file name.""",
                            default="")
    proc_group.add_argument('-tn', '--tag-name',
                            help="""Specify a tag name for the final MP4 container.
                            Defaults to the original file name.""",
                            default="")
    proc_group.add_argument('-tc', '--tag-comment',
                            help="""Specify a tag name for the final MP4 container.""",
                            default="")
    proc_group.add_argument('-preserve', '--preserve-temp-files',
                            help="""Preserve temporary files on the filesystem rather
                             than deleting them at the end of each run.""",
                            action='store_true', default=False)
    proc_group.add_argument("-eS", "--error-filesize",
                            help="""Stop processing this file if it is over 4GiB.
                             Files of this size will not be processed correctly by some
                             devices such as the Xbox 360, and they will not save
                             correctly to FAT32-formatted storage. By default, you will
                             only see a warning message, and processing will continue.""",
                            action="store_true")
    proc_group.add_argument('--mp4box-retries',
                            help="""Set the number of retry attempts for MP4Box to attempt
                             to create a file (default: 3)""", default=3, type=int)

    dep_group = parser.add_argument_group("Custom paths",
                                          "Set custom paths for the utilities used by "
                                          "XenonMKV.")
    for dependency in dependencies:
        dep_group.add_argument("--{0}-path".format(dependency.lower()),
                               help="""Set a custom complete path for the {0} tool.
                                Any library under that path will also be
                                loaded.""".format(dependency))

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    config_file_output = False

    # If a configuration file was specified, attempt to read it.
    if args.config_file and os.path.isfile(args.config_file):
        config_file_output = parse_config_file(args)

    # Depending on the arguments, set the logging level appropriately.
    if args.quiet:
        log.setLevel(logging.ERROR)
    elif args.debug:
        log.setLevel(logging.DEBUG)
        log.debug("Using debug/highly verbose mode output")
    elif args.verbose:
        log.setLevel(logging.INFO)

    # If we parsed a configuration file, run through all logging output
    if config_file_output:
        for level, message in config_file_output:
            getattr(log, level)(message)

    # Pick temporary/scratch directory
    if not args.scratch_dir:
        if "TEMP" in os.environ:
            args.scratch_dir = os.environ["TEMP"]
        elif os.path.isdir("/var/tmp"):
            args.scratch_dir = "/var/tmp"
        else:
            args.scratch_dir = os.curdir

    # Apply selected profile
    if args.profile:
        args.channels = 2
        args.error_filesize = True

    # Check for 5.1/7.1 audio with the channels setting
    if args.channels == "5.1":
        args.channels = 6
    elif args.channels == "7.1":
        args.channels = 8
    if args.channels not in ('2', '4', '6', '8', 2, 4, 6, 8):
        log.warning("An invalid number of channels was specified. "
                    "Falling back to 2-channel stereo audio.")
        args.channels = 2

    # Enforce channels as integer for comparison purposes later on
    args.channels = int(args.channels)

    # Ensure preferred language, if present, is lowercased and 2 characters
    if args.preferred_language:
        args.preferred_language = args.preferred_language.lower()
        if len(args.preferred_language) < 2:
            log.warning("Could not set preferred language code '{0}'".format(
                        args.preferred_language))
            args.preferred_language = None
        elif len(args.preferred_language) > 2:
            args.preferred_language = args.preferred_language[0:2]
            log.warning("Preferred language code truncated to '{0}'".format(
                        args.preferred_language))

    # Make sure user is not prompted for input if quiet option is used
    if args.quiet and args.select_tracks:
        log.warning("Cannot use interactive track selection in quiet mode. "
                    "Tracks will be automatically selected.")
        args.select_tracks = False

    log.debug("Starting XenonMKV")

    # Check if we have a full file path or are just specifying a file
    if os.sep not in args.source_file:
        log.debug("Ensuring that we have a complete path to {0}".format(
                  args.source_file))
        args.source_file = os.path.join(os.getcwd(), args.source_file)
        log.debug("{0} will be used to reference the original MKV file".format(
                  args.source_file))

    # Always ensure destination path ends with a slash
    if not args.destination.endswith(os.sep):
        args.destination += os.sep

    if not args.scratch_dir.endswith(os.sep):
        args.scratch_dir += os.sep

    # Initialize file utilities
    f_utils = FileUtils(log, args)

    # Check if all dependent applications are installed and available in PATH,
    # or if they are specified.
    # If so, store them in args.tool_paths so all classes
    # have access to them as needed
    (args.tool_paths, args.library_paths) = f_utils.check_dependencies(
        dependencies
    )

    # Check if source file exists and is an appropriate size
    try:
        f_utils.check_source_file(args.source_file)
    except IOError as e:
        log_exception("check_source_file", e)

    source_basename = os.path.basename(args.source_file)
    log.debug("Source Basename: {0}".format(source_basename))
    source_noext = source_basename

    if not args.name:
        args.name = source_noext
        log.debug("Using '{0}' as final container name".format(args.name))

    if not args.tag_name:
        args.tag_name = source_noext
        log.debug("Using '{0}' as tag name".format(args.tag_name))

    # Check if destination directory exists
    try:
        f_utils.check_dest_dir(args.destination)
    except IOError as e:
        log_exception("check_dest_dir", e)

    log.info("Loading source file {0}".format(args.source_file))

    if args.print_file:
        print "Processing: {0}".format(args.source_file)

    try:
        to_convert = MKVFile(args.source_file, log, args)
        to_convert.get_mkvinfo()
    except Exception as e:
        if not args.preserve_temp_files:
            cleanup_temp_files()
        log_exception("get_mkvinfo", e)

    # If the user knows which A/V tracks they want, set them.
    # MKVFile will not overwrite them.
    try_set_video_track(to_convert)
    try_set_audio_track(to_convert)

    try:
        # Check for multiple tracks
        if to_convert.has_multiple_av_tracks():
            log.debug("Source file {0} has multiple audio or "
                      "video tracks".format(args.source_file))

            # First, pick default tracks,
            # which can be overridden in select_tracks
            to_convert.set_default_av_tracks()

            if args.select_tracks:
                video_tracks = to_convert.video_track_list()
                audio_tracks = to_convert.audio_track_list()
                if len(video_tracks) > 1:
                    args.video_track = select_track("video", video_tracks)
                    try_set_video_track(to_convert)
                if len(audio_tracks) > 1:
                    args.audio_track = select_track("audio", audio_tracks)
                    try_set_audio_track(to_convert)
            else:
                log.debug("Selected default audio and video tracks")

        else:
            # Pick default (or only) audio/video tracks
            log.debug("Source file {0} has 1 audio and 1 video track; "
                      "using these".format(args.source_file))
            to_convert.set_default_av_tracks()
    except Exception as e:
        if not args.preserve_temp_files:
            cleanup_temp_files()
        log_exception("set_default_av_tracks", e)

    # Next phase: Extract MKV files to scratch directory
    try:
        (video_file, audio_file) = to_convert.extract_mkv()
    except Exception as e:
        if not args.preserve_temp_files:
            cleanup_temp_files()
        log_exception("extract_mkv", e)

    # If needed, hex edit the video file to make it compliant
    # with a lower h264 profile level
    if video_file.endswith(".h264"):
        f_utils.hex_edit_video_file(video_file)

    # Detect which audio codec is in place and dump audio to WAV accordingly
    if not audio_file.endswith(".aac"):
        log.debug("Audio track {0} needs to be re-encoded".format(audio_file))

        # use the appropriate AAC encoder to transform it to .aac
        enc = AACEncoder(args.scratch_dir, log, args)
        enc.encode()
        encoded_audio = os.path.join(args.scratch_dir, "audiodump.aac")
    else:
        # The audio track does not need to be re-encoded.
        # Reference the already-valid audio file and put it into the MP4 container.
        log.debug("Audio track {0} does not needs to be re-encoded".format(audio_file))
        encoded_audio = audio_file

    # Now, throw things back together into a .mp4 container with MP4Box.
    video_track = to_convert.get_video_track()
    mp4box = MP4Box(video_file, encoded_audio, video_track.frame_rate,
                    video_track.pixel_ar, args, log)
    try:
        mp4box.package()
    except Exception as e:
        if not args.preserve_temp_files:
            cleanup_temp_files()
        log_exception("package", e)

    # Move the file to the destination directory with the original name
    dest_path = os.path.join(args.destination, source_noext + ".mp4")

    log.info("Processing of {0} complete; file saved as {1}".format(
             args.source_file, dest_path))

    # Delete temporary files if possible
    if not args.preserve_temp_files:
        cleanup_temp_files()

    log.debug("XenonMKV completed processing")
    if args.print_file:
        print "Completed: {0}".format(dest_path)
Example #29
0
from utils.file_utils import FileUtils
from utils.general_utils import GeneralUtils
from pandas import DataFrame

df = FileUtils.read_excel_file('resources/persian-swear-words.xlsx')
swear_words = df.iloc[:, 0].tolist()
for w in swear_words:
    for l in w:
        if l not in "آابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیيى ‌":
            print(w, ',', l)

sorted_lst = [[w, ] for w in GeneralUtils.sort_lis_in_persian(swear_words) ]
FileUtils.write_lists2excel_file(sorted_lst, 'resources/test.xlsx', ['واژه'])
Example #30
0
def cleanup_temp_files():
    global args, log
    if not args.preserve_temp_files:
        t = MKVTrack(log)
        f_utils = FileUtils(log, args)
        f_utils.delete_temp_files(args.scratch_dir, t.get_possible_extensions())
Example #31
0
def main():
    # Main program begins
    global args, log, app_path
    log = logging.getLogger("xenonmkv")
    console_handler = logging.StreamHandler()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s [%(levelname)s] %(message)s')
    console_handler.setFormatter(formatter)
    log.addHandler(console_handler)

    dependencies = ('mkvinfo', 'mediainfo', 'mkvextract', 'mplayer', 'faac',
                    'MP4Box')

    parser = argparse.ArgumentParser(
        description='Parse command line arguments '
        'for XenonMKV.')
    parser.add_argument('source_file', help='Path to the source MKV file')
    parser.add_argument('-d',
                        '--destination',
                        help="""Directory to output the destination .mp4 file
                        (default: current directory)""",
                        default='.')
    parser.add_argument(
        '-sd',
        '--scratch-dir',
        help="""Specify a scratch directory where temporary files should
                         be stored""",
        default=None)
    parser.add_argument('-cfg',
                        '--config-file',
                        help="""Provide a configuration file that
                         contains default arguments or settings for the application""",
                        default='')
    parser.add_argument(
        "-p",
        '--profile',
        help="""Select a standardized device profile for encoding.
                         Current profile options are: xbox360, playbook""",
        default="")

    output_group = parser.add_argument_group("Output options")
    output_group.add_argument(
        '-q',
        '--quiet',
        help="""Do not display output or progress from tools,
                               or prompt for input""",
        action='store_true')
    output_group.add_argument('-v',
                              '--verbose',
                              help='Verbose output',
                              action='store_true')
    output_group.add_argument('-vv',
                              '--debug',
                              help='Highly verbose debug output',
                              action='store_true')
    output_group.add_argument(
        '-pf',
        '--print-file',
        help='Output filenames before and after converting',
        action='store_true')

    video_group = parser.add_argument_group("Video options",
                                            "Options for processing video.")
    video_group.add_argument(
        '-nrp',
        '--no-round-par',
        help="""When processing video, do not round pixel aspect
                             ratio from 0.98 to 1.01 to 1:1.""",
        action='store_true')
    video_group.add_argument(
        '-irf',
        '--ignore-reference-frames',
        help="""If the source video has too many reference frames
                              to play on low-powered devices (Xbox, PlayBook), continue
                              converting anyway""",
        action='store_true')

    audio_group = parser.add_argument_group(
        "Audio options", "Select custom audio decoding and "
        "encoding options.")
    audio_group.add_argument(
        '-c',
        '--channels',
        help="""Specify the maximum number of channels that are
                              acceptable in the output file. Certain devices (Xbox) will
                              not play audio with more than two channels. If the audio
                              needs to be re-encoded at all, it will be downmixed to two
                              channels only. Possible values for this option are 2
                              (stereo); 4 (surround); 5.1 or 6 (full 5.1); 7.1 or 8
                              (full 7.1 audio).
                              For more details, view the README file.""",
        default=6)
    audio_group.add_argument(
        '-fq',
        '--faac-quality',
        help="""Quality setting for FAAC when encoding WAV files
                              to AAC. Defaults to 150 (see
                              http://wiki.hydrogenaudio.org/index.php?title=FAAC)""",
        default=150)

    track_group = parser.add_argument_group(
        "Track options", "These options determine how multiple tracks "
        "in MKV files are selected.")
    track_group.add_argument(
        '-st',
        '--select-tracks',
        help="""If there are multiple tracks in the MKV file, prompt
                             to select which ones will be used. By default, the last video
                              and audio tracks flagged as 'default' in the MKV file will
                             be used. This option requires interactive user input, so do
                             not use it in batch processing or scripts.""",
        action='store_true')
    track_group.add_argument(
        '-vt',
        '--video-track',
        help="""Use the specified video track. If not present in
                             the file, the default track will be used.""",
        type=int)
    track_group.add_argument(
        '-at',
        '--audio-track',
        help="""Use the specified audio track. If not present in
                             the file, the default track will be used.""",
        type=int)
    track_group.add_argument(
        '-lang',
        '--preferred-language',
        help="""Provide a preferred language code in ISO 639-1 format
                              ('en' for English, 'fr' for French, etc.)
                             When picking tracks, this language will be preferred."""
    )

    proc_group = parser.add_argument_group(
        "File and processing options", """These options determine how XenonMKV
                                           processes files and their contents."""
    )
    proc_group.add_argument(
        '-rp',
        '--resume-previous',
        help="""Resume a previous run (do not recreate files
                            if they already exist). Useful for debugging quickly if a
                            conversion has already partially succeeded.""",
        action='store_true')
    proc_group.add_argument('-n',
                            '--name',
                            help="""Specify a name for the final MP4 container.
                            Defaults to the original file name.""",
                            default="")
    proc_group.add_argument(
        '-tn',
        '--tag-name',
        help="""Specify a tag name for the final MP4 container.
                            Defaults to the original file name.""",
        default="")
    proc_group.add_argument(
        '-tc',
        '--tag-comment',
        help="""Specify a tag name for the final MP4 container.""",
        default="")
    proc_group.add_argument(
        '-preserve',
        '--preserve-temp-files',
        help="""Preserve temporary files on the filesystem rather
                             than deleting them at the end of each run.""",
        action='store_true',
        default=False)
    proc_group.add_argument(
        "-eS",
        "--error-filesize",
        help="""Stop processing this file if it is over 4GiB.
                             Files of this size will not be processed correctly by some
                             devices such as the Xbox 360, and they will not save
                             correctly to FAT32-formatted storage. By default, you will
                             only see a warning message, and processing will continue.""",
        action="store_true")
    proc_group.add_argument(
        '--mp4box-retries',
        help="""Set the number of retry attempts for MP4Box to attempt
                             to create a file (default: 3)""",
        default=3,
        type=int)

    dep_group = parser.add_argument_group(
        "Custom paths", "Set custom paths for the utilities used by "
        "XenonMKV.")
    for dependency in dependencies:
        dep_group.add_argument(
            "--{0}-path".format(dependency.lower()),
            help="""Set a custom complete path for the {0} tool.
                                Any library under that path will also be
                                loaded.""".format(dependency))

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit(1)

    args = parser.parse_args()
    config_file_output = False

    # If a configuration file was specified, attempt to read it.
    if args.config_file and os.path.isfile(args.config_file):
        config_file_output = parse_config_file(args)

    # Depending on the arguments, set the logging level appropriately.
    if args.quiet:
        log.setLevel(logging.ERROR)
    elif args.debug:
        log.setLevel(logging.DEBUG)
        log.debug("Using debug/highly verbose mode output")
    elif args.verbose:
        log.setLevel(logging.INFO)

    # If we parsed a configuration file, run through all logging output
    if config_file_output:
        for level, message in config_file_output:
            getattr(log, level)(message)

    # Pick temporary/scratch directory
    if not args.scratch_dir:
        if "TEMP" in os.environ:
            args.scratch_dir = os.environ["TEMP"]
        elif os.path.isdir("/var/tmp"):
            args.scratch_dir = "/var/tmp"
        else:
            args.scratch_dir = os.curdir

    # Apply selected profile
    if args.profile:
        args.channels = 2
        args.error_filesize = True

    # Check for 5.1/7.1 audio with the channels setting
    if args.channels == "5.1":
        args.channels = 6
    elif args.channels == "7.1":
        args.channels = 8
    if args.channels not in ('2', '4', '6', '8', 2, 4, 6, 8):
        log.warning("An invalid number of channels was specified. "
                    "Falling back to 2-channel stereo audio.")
        args.channels = 2

    # Enforce channels as integer for comparison purposes later on
    args.channels = int(args.channels)

    # Ensure preferred language, if present, is lowercased and 2 characters
    if args.preferred_language:
        args.preferred_language = args.preferred_language.lower()
        if len(args.preferred_language) < 2:
            log.warning("Could not set preferred language code '{0}'".format(
                args.preferred_language))
            args.preferred_language = None
        elif len(args.preferred_language) > 2:
            args.preferred_language = args.preferred_language[0:2]
            log.warning("Preferred language code truncated to '{0}'".format(
                args.preferred_language))

    # Make sure user is not prompted for input if quiet option is used
    if args.quiet and args.select_tracks:
        log.warning("Cannot use interactive track selection in quiet mode. "
                    "Tracks will be automatically selected.")
        args.select_tracks = False

    log.debug("Starting XenonMKV")

    # Check if we have a full file path or are just specifying a file
    if os.sep not in args.source_file:
        log.debug("Ensuring that we have a complete path to {0}".format(
            args.source_file))
        args.source_file = os.path.join(os.getcwd(), args.source_file)
        log.debug("{0} will be used to reference the original MKV file".format(
            args.source_file))

    # Always ensure destination path ends with a slash
    if not args.destination.endswith(os.sep):
        args.destination += os.sep

    if not args.scratch_dir.endswith(os.sep):
        args.scratch_dir += os.sep

    # Initialize file utilities
    f_utils = FileUtils(log, args)

    # Check if all dependent applications are installed and available in PATH,
    # or if they are specified.
    # If so, store them in args.tool_paths so all classes
    # have access to them as needed
    (args.tool_paths,
     args.library_paths) = f_utils.check_dependencies(dependencies)

    # Check if source file exists and is an appropriate size
    try:
        f_utils.check_source_file(args.source_file)
    except IOError as e:
        log_exception("check_source_file", e)

    source_basename = os.path.basename(args.source_file)
    log.debug("Source Basename: {0}".format(source_basename))
    source_noext = source_basename

    if not args.name:
        args.name = source_noext
        log.debug("Using '{0}' as final container name".format(args.name))

    if not args.tag_name:
        args.tag_name = source_noext
        log.debug("Using '{0}' as tag name".format(args.tag_name))

    # Check if destination directory exists
    try:
        f_utils.check_dest_dir(args.destination)
    except IOError as e:
        log_exception("check_dest_dir", e)

    log.info("Loading source file {0}".format(args.source_file))

    if args.print_file:
        print "Processing: {0}".format(args.source_file)

    try:
        to_convert = MKVFile(args.source_file, log, args)
        to_convert.get_mkvinfo()
    except Exception as e:
        if not args.preserve_temp_files:
            cleanup_temp_files()
        log_exception("get_mkvinfo", e)

    # If the user knows which A/V tracks they want, set them.
    # MKVFile will not overwrite them.
    try_set_video_track(to_convert)
    try_set_audio_track(to_convert)

    try:
        # Check for multiple tracks
        if to_convert.has_multiple_av_tracks():
            log.debug("Source file {0} has multiple audio or "
                      "video tracks".format(args.source_file))

            # First, pick default tracks,
            # which can be overridden in select_tracks
            to_convert.set_default_av_tracks()

            if args.select_tracks:
                video_tracks = to_convert.video_track_list()
                audio_tracks = to_convert.audio_track_list()
                if len(video_tracks) > 1:
                    args.video_track = select_track("video", video_tracks)
                    try_set_video_track(to_convert)
                if len(audio_tracks) > 1:
                    args.audio_track = select_track("audio", audio_tracks)
                    try_set_audio_track(to_convert)
            else:
                log.debug("Selected default audio and video tracks")

        else:
            # Pick default (or only) audio/video tracks
            log.debug("Source file {0} has 1 audio and 1 video track; "
                      "using these".format(args.source_file))
            to_convert.set_default_av_tracks()
    except Exception as e:
        if not args.preserve_temp_files:
            cleanup_temp_files()
        log_exception("set_default_av_tracks", e)

    # Next phase: Extract MKV files to scratch directory
    try:
        (video_file, audio_file) = to_convert.extract_mkv()
    except Exception as e:
        if not args.preserve_temp_files:
            cleanup_temp_files()
        log_exception("extract_mkv", e)

    # If needed, hex edit the video file to make it compliant
    # with a lower h264 profile level
    if video_file.endswith(".h264"):
        f_utils.hex_edit_video_file(video_file)

    # Detect which audio codec is in place and dump audio to WAV accordingly
    if not audio_file.endswith(".aac"):
        log.debug("Audio track {0} needs to be re-encoded".format(audio_file))

        # use the appropriate AAC encoder to transform it to .aac
        enc = AACEncoder(args.scratch_dir, log, args)
        enc.encode()
        encoded_audio = os.path.join(args.scratch_dir, "audiodump.aac")
    else:
        # The audio track does not need to be re-encoded.
        # Reference the already-valid audio file and put it into the MP4 container.
        log.debug("Audio track {0} does not needs to be re-encoded".format(
            audio_file))
        encoded_audio = audio_file

    # Now, throw things back together into a .mp4 container with MP4Box.
    video_track = to_convert.get_video_track()
    mp4box = MP4Box(video_file, encoded_audio, video_track.frame_rate,
                    video_track.pixel_ar, args, log)
    try:
        mp4box.package()
    except Exception as e:
        if not args.preserve_temp_files:
            cleanup_temp_files()
        log_exception("package", e)

    # Move the file to the destination directory with the original name
    dest_path = os.path.join(args.destination, source_noext + ".mp4")

    log.info("Processing of {0} complete; file saved as {1}".format(
        args.source_file, dest_path))

    # Delete temporary files if possible
    if not args.preserve_temp_files:
        cleanup_temp_files()

    log.debug("XenonMKV completed processing")
    if args.print_file:
        print "Completed: {0}".format(dest_path)
Example #32
0
 def __init__(self):
     self.root_path = join(expanduser('~'), "PycharmProjects",
                           "python-vistalytics", "source")
     self.math_utils = MathUtils()
     self.file_utils = FileUtils()
Example #33
0
import sys, os
from utils.logger_setup import LoggerSetup
from utils.file_utils import FileUtils

data_dir =  FileUtils.get_datadir()
project_dir = FileUtils.get_projectdir()
schema_dir = FileUtils.get_schemadir()

logger = LoggerSetup.get_logger()
LoggerSetup.set_info_level()

logger.info("utils package intialized")
def run():
    data_path = FileUtils.get_abs_path(__file__, "./data/ex8_movies.mat")
    mat3 = loadmat(data_path)

    data_path = FileUtils.get_abs_path(__file__, "./data/ex8_movieParams.mat")
    mat4 = loadmat(data_path)

    Y = mat3[
        "Y"]  # 1682 X 943 matrix, containing ratings (1-5) of 1682 movies on 943 user
    R = mat3[
        "R"]  # 1682 X 943 matrix, where R(i,j) = 1 if and only if user j give rating to movie i
    X = mat4[
        "X"]  # 1682 X 10 matrix , num_movies X num_features matrix of movie features
    Theta = mat4[
        "Theta"]  # 943 X 10 matrix, num_users X num_features matrix of user features
    # Compute average rating
    print("Average rating for movie 1 (Toy Story):",
          np.sum(Y[0, :] * R[0, :]) / np.sum(R[0, :]), "/5")
    # Reduce the data set size to run faster
    num_users, num_movies, num_features = 4, 5, 3
    X_test = X[:num_movies, :num_features]
    Theta_test = Theta[:num_users, :num_features]
    Y_test = Y[:num_movies, :num_users]
    R_test = R[:num_movies, :num_users]
    params = np.append(X_test.flatten(), Theta_test.flatten())
    # Evaluate cost function
    J, grad = cofi_cost_function(params, Y_test, R_test, num_users, num_movies,
                                 num_features, 0)
    print("Cost at loaded parameters:", J)
    J2, grad2 = cofi_cost_function(params, Y_test, R_test, num_users,
                                   num_movies, num_features, 1.5)
    print("Cost at loaded parameters (lambda = 1.5):", J2)
    # load movie list

    data_path = FileUtils.get_abs_path(__file__, "./data/movie_ids.txt")

    movieList = open(data_path, "r").read().split("\n")[:-1]
    # see movie list

    # Initialize my ratings
    my_ratings = np.zeros((1682, 1))
    # Create own ratings
    my_ratings[0] = 4
    my_ratings[97] = 2
    my_ratings[6] = 3
    my_ratings[11] = 5
    my_ratings[53] = 4
    my_ratings[63] = 5
    my_ratings[65] = 3
    my_ratings[68] = 5
    my_ratings[82] = 4
    my_ratings[225] = 5
    my_ratings[354] = 5
    print("New user ratings:\n")
    for i in range(len(my_ratings)):
        if my_ratings[i] > 0:
            print("Rated", int(my_ratings[i]), "for index", movieList[i])

    Y = np.hstack((my_ratings, Y))
    R = np.hstack((my_ratings != 0, R))
    # Normalize Ratings
    Ynorm, Ymean = normalize_ratings(Y, R)

    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = 10
    # Set initial Parameters (Theta,X)
    X = np.random.randn(num_movies, num_features)
    Theta = np.random.randn(num_users, num_features)
    initial_parameters = np.append(X.flatten(), Theta.flatten())
    Lambda = 10

    options = {'maxiter': 100}
    result = op.minimize(fun=cofi_cost_function,
                         x0=initial_parameters,
                         args=(Ynorm, R, num_users, num_movies, num_features,
                               Lambda),
                         method='TNC',
                         jac=True,
                         options=options)
    paramsFinal = result.x

    X = paramsFinal[0:num_movies * num_features].reshape(
        num_movies, num_features)
    Theta = paramsFinal[num_movies * num_features:].reshape(
        num_users, num_features)

    p = X @ Theta.T
    my_predictions = p[:, 0][:, np.newaxis] + Ymean

    df = pd.DataFrame(
        np.hstack((my_predictions, np.array(movieList)[:, np.newaxis])))
    df.sort_values(by=[0], ascending=False, inplace=True)
    df.reset_index(drop=True, inplace=True)
    print("Top recommendations for you:\n")
    for i in range(10):
        print("Predicting rating", round(float(df[0][i]), 1), " for index",
              df[1][i])