def __init__(self): self.root_path = join(expanduser('~'), 'PycharmProjects', 'python-vistalytics', 'source') self.tree = Tree() self.tree_str = TreeStructure() self.file_utils = FileUtils() self.report_choice = con.INCOME_STATEMENTS
def _read_csv(self, resource_dir): src_dir_path = join(self.root_path, 'in', 'csv', resource_dir) files = FileUtils().get_files(src_dir_path) data_list = [] for f in files: data_list.append(FileUtils().read_csv(src_dir_path, f)) return data_list
def cleanup_temp_files(): global args, log if not args.preserve_temp_files: t = MKVTrack(log) f_utils = FileUtils(log, args) f_utils.delete_temp_files(args.scratch_dir, t.get_possible_extensions())
def __init__(self): self.root_path = join(expanduser("~"), "PycharmProjects", "python-vistalytics", "source") self.ignore_keys = ['Margins % of Sales', 'Profitability', 'Cash Flow Ratios', 'Balance Sheet Items (in %)', 'Liquidity/Financial Health', 'Efficiency'] self.sheet_list = ['Income_Statement', 'Balance_Sheet', 'Cash_Flow_Statement', 'Key_Ratios'] self.math_utils = MathUtils() self.file_utils = FileUtils()
class CurrentQuarterAnalysis(object): def __init__(self): self.root_path = join(expanduser('~'), 'PycharmProjects', 'python-vistalytics', 'source') self.tree = Tree() self.tree_str = TreeStructure() self.file_utils = FileUtils() self.report_choice = con.INCOME_STATEMENTS def _read_csv(self, resource_dir): src_dir_path = join(self.root_path, 'in', 'csv', resource_dir) files = self.file_utils.get_files(src_dir_path) data_list = [] for f in files: data_list.append(self.file_utils.read_csv(src_dir_path, f)) return data_list def _compare(self, v1, v2): a = 1 if (v1 == 0) else 0 b = 1 if (v2 == 0) else 0 return True if (a ^ b) == 1 else False def process_data(self, data_list): for data in data_list: if con.TTM in data.columns: data.drop(con.TTM, 1, inplace=True) for i, v in data.iterrows(): if self.tree.__contains__(self.tree_str.get_string(i)): size_val = len(v) if self._compare(v[size_val - 2], v[size_val - 1]): print("#############################################") print("Index: " + i + " " + str(v[size_val - 2]) + " : " + str(v[size_val - 1])) def run(self): print( "Choices :\n\n\tDefault: Income Statement\n\t1: Balance Statement\n\t2: Cash Flow Statement" ) c = input() try: self.report_choice = self.tree_str.report_choices[int(c)] if self.report_choice is con.BALANCE_STATEMENTS: self.tree_str.get_balance_tree() else: self.tree_str.get_cash_flow_tree() except KeyError: self.report_choice = con.INCOME_STATEMENTS self.tree_str.get_income_tree() except ValueError: self.report_choice = con.INCOME_STATEMENTS self.tree_str.get_income_tree() self.tree = self.tree_str.tree data_list = self._read_csv(self.report_choice) print(self.tree) self.process_data(data_list)
def download_photos(self, filepath, results): if results.has_key("photos"): i = 0 for photo_link in results["photos"]: hu = HTTPUtils(photo_link) res = hu.make_request() photo_list = photo_link.split(".") ext = photo_list[len(photo_list) - 1] fu = FileUtils(filepath + str(i) + "." + ext) fu.write_binary(res.content) i += 1 else: print("[*] No photos to download.")
def completar_info_basica(self, img, fecha): time.sleep(5) self.driver.execute_script('$("#fecha_certificado").val("' + fecha + '")') time.sleep(5) self.find_element(self.__locators.AGREGAR_IMG_BTN).click() FileUtils.seleccionar_img_gui(img) self.find_element(self.__locators.TERMIN_CONDIC_INP).click() self.find_element(self.__locators.ACEPTAR_BTN).click() try: self.find_element(self.__locators.ACEPTAR_ADV_BTN).click() except TimeoutException: pass
def run(self): results = None if self.cf: print("[-] Still not implemented. Could not read from file.") else: try: url, params = self.build_request() hu = HTTPUtils(url, params=params) html = hu.make_request() links = ParsingEngine.parse_search(html.content, self.engine) if not links: print("[-] Search did not return results. Exiting...") exit() # Check if search with these params was made 4 this engine filepath = self.check_or_create_params_dir() filepath += "/" search_filepath = filepath # Check if last_result exist last_result = self.check_last_result(filepath) # Check if today, a search was done with these params filepath = self.check_or_create_today_dir(filepath) filepath += "/" i = 0 for link in links: try: if last_result == link: print("[ * Result from prior searches found * ]") break hu = HTTPUtils(link) html = hu.make_request() results = ParsingEngine.parse_result(html.content, self.engine) if not results: print("[-] No data retrieved for result: %s" % link) continue lines = self.build_text(results, link) f, e = self.check_or_create_res_dir(filepath, link, i) f += "/" if e: # result already saved continue self.download_photos(f, results) fu = FileUtils(f + "info.txt") if html.encoding: fu.write(lines.encode(html.encoding)) else: fu.write(lines) i += 1 except Exception as e: print("[-] Search result: %s could not be processed: %s" % (link, str(e)) ) import sys, os exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) if links and links[0]: fu = FileUtils(search_filepath + "last_result") fu.write(links[0]) print("[+] %s results processed" % str(i)) except Exception as e: print("[-] Something went wrong: %s" % e) return results
def run(): file_path = FileUtils.get_abs_path(__file__, "./data/emailSample1.txt") vocab_path = FileUtils.get_abs_path(__file__, "./data/vocab.txt") file_contents = open(file_path, "r").read() vocabList = open(vocab_path, "r").read() vocabList = vocabList.split("\n")[:-1] vocabList_d = {} for ea in vocabList: value, key = ea.split("\t")[:] vocabList_d[key] = value print(file_contents) word_indices = process_email(file_contents, vocabList_d) features = email_features(word_indices, vocabList_d) print("Length of feature vector: ", len(features)) print("Number of non-zero entries: ", np.sum(features)) spam_mat_path = FileUtils.get_abs_path(__file__, "./data/spamTrain.mat") spam_mat = loadmat(spam_mat_path) X_train = spam_mat["X"] y_train = spam_mat["y"] C = 0.1 spam_svc = SVC(C=0.1, kernel="linear") spam_svc.fit(X_train, y_train.ravel()) print("Training Accuracy:", (spam_svc.score(X_train, y_train.ravel())) * 100, "%") spam_mat_test_path = FileUtils.get_abs_path(__file__, "./data/spamTest.mat") spam_mat_test = loadmat(spam_mat_test_path) X_test = spam_mat_test["Xtest"] y_test = spam_mat_test["ytest"] print("Test Accuracy:", (spam_svc.score(X_test, y_test.ravel())) * 100, "%") file_path = FileUtils.get_abs_path(__file__, "./data/spamSample1.txt") file_contents = open(file_path, "r").read() word_indices = process_email(file_contents, vocabList_d) features = email_features(word_indices, vocabList_d) features = features.reshape([1, 1899]) print(spam_svc.predict(features)) print('1 is spam, 0 is not spam')
def run(): data_path = FileUtils.get_abs_path(__file__, "./data/ex3weights.mat") mat2 = loadmat(data_path) Theta1 = mat2['Theta1'] Theta2 = mat2['Theta2'] np.set_printoptions(suppress=True) data_path = FileUtils.get_abs_path(__file__, "./data/ex3data1.mat") mat = loadmat(data_path) X = mat["X"] y = mat["y"] res = predict_nn(Theta1, Theta2, X) print("Accuracy on training set with Neural Network:", np.mean((res == y)) * 100)
def run(self): logger.info('开始获取读取本地地区文件') city_data = FileUtils.get_city() logger.info('开始获取商家评论数据') list( map(lambda x: self.crawler_start(x['code']), city_data['city'].values()))
def run(): data_path = FileUtils.get_abs_path(__file__, "./data/ex2data1.txt") data = np.loadtxt(data_path, delimiter=',') n = np.size(data, 1) x = data[:, range(n - 1)] y = data[:, n - 1] m = np.size(y, 0) x = np.reshape(x, [m, n - 1]) y = np.reshape(y, [m, 1]) ones = np.ones([m, 1]) x = np.hstack([ones, x]) theta = np.zeros([n, 1]) cost, grad = cost_function(theta, x, y) print("Cost with theta [0;0;0]: ", cost) print('Theta Result with [0;0;0]:\n', grad) test_theta = str2arr('[-24; 0.2; 0.2]') cost, grad = cost_function(test_theta, x, y) print("Cost with theta [-24; 0.2; 0.2]: ", cost) print('Theta Result with [-24; 0.2; 0.2]:\n', grad) Result = op.minimize(fun=cost_function, x0=theta, args=(x, y), method='TNC', jac=True) optimal_theta = Result.x print('Optimal theta: ', optimal_theta) res = predict(optimal_theta, x) print("Accuracy:", np.mean(((res == y).flatten())) * 100) plot_decision_boundary(optimal_theta, x, y)
def run(): np.set_printoptions(suppress=True) data_path = FileUtils.get_abs_path(__file__, "./data/ex3data1.mat") mat = loadmat(data_path) X = mat["X"] y = mat["y"] fig, axis = plt.subplots(10, 10, figsize=(12, 12)) for i in range(10): for j in range(10): axis[i, j].imshow(X[np.random.randint(0, 5001), :].reshape(20, 20, order="F"), cmap="hot") # reshape back to 20 pixel by 20 pixel axis[i, j].axis("off") plt.show() theta_t = str2arr('[-2; -1; 1; 2]') X_t = np.array([np.linspace(0.1, 1.5, 15)]).reshape(3, 5).T X_t = np.hstack((np.ones((5, 1)), X_t)) y_t = (str2arr('[1;0;1;0;1]')) lambda_t = 3 cost, grad = cost_function_regularized(theta_t, X_t, y_t, lambda_t) print("Cost:", cost, "Expected cost: 2.534819") print( "Gradients:\n", grad, "\nExpected gradients:\n 0.146561\n -0.548558\n 0.724722\n 1.398003") lambda_value = 0.1 num_labels = 10 all_theta = one_vs_all(X, y, num_labels, lambda_value) res = predict_one_vs_all(all_theta, X) print("Accuracy on training set with OneVsAll:", np.mean((res == y)) * 100)
def get_vector(user_comments_full_path, most_frequent_words): """ calculate feature vector for user, based on most frequent words and basic features of that comments :param user_comments_full_path: path all comments of specific user :param most_frequent_words: set of most_frequent_words :return: feature vector for user """ comments_train = FileUtils.get_list_of_comments( user_comments_full_path) word_feq_dict_train = dict( FeatureExtractor.get_word_list_frequency(comments_train)) basic_features_value_list = FeatureExtractor.get_basic_features( comments_train) word_freq_feature_value_list = [] for word in most_frequent_words: if word in word_feq_dict_train: word_freq_feature_value_list.append(word_feq_dict_train[word]) else: word_freq_feature_value_list.append(0) vector = basic_features_value_list + word_freq_feature_value_list return vector
class TestRunner: validator = Validator( os.path.join(FileUtils.get_schemadir(), "merged-syntax.xsd")) @staticmethod def testOK(mapping_sample, case_prefix): files = os.listdir(mapping_sample) ok_prefix = case_prefix + "_ok" for sample_file in files: if sample_file.startswith(ok_prefix) is True: file_path = os.path.join(mapping_sample, sample_file) if TestRunner.validator.validate_file(file_path, verbose=False) is False: TestRunner.validator.validate_file(file_path, verbose=True) logger.error(sample_file + " is not valid, it should be") return False logger.info(sample_file + " is valid: fine") return True @staticmethod def testKO(mapping_sample, case_prefix): ko_prefix = case_prefix + "_ko" files = os.listdir(mapping_sample) for sample_file in files: if sample_file.startswith(ko_prefix) is True: file_path = os.path.join(mapping_sample, sample_file) if TestRunner.validator.validate_file(file_path, verbose=False) is True: logger.error(sample_file) return False logger.info(sample_file + " is not valid: fine") return True
class QuarterlyAverageChangeAnalysis(object): def __init__(self): self.root_path = join(expanduser('~'), "PycharmProjects", "python-vistalytics", "source") self.math_utils = MathUtils() self.file_utils = FileUtils() def run(self): path = join(self.root_path, "in", 'quarter') file_list = self.file_utils.get_files(path) for f in file_list: key_list = [] change = [] percent_change = [] data = self.file_utils.read_csv(path, f) if 'TTM' in data: data = data.drop('TTM', 1) for i, v in data.iterrows(): # Prepare Columns key_list.append(i) size_val = len(v) change.append( self.math_utils.get_change(v[size_val - 5], v[size_val - 1])) percent_change.append( self.math_utils.percentage_change(v[size_val - 5], v[size_val - 1])) # Write csv. output_data = { 'Keys': key_list, 'Year Over Year Change': change, 'Year Over Year Change (%)': percent_change } output_data = pd.DataFrame(output_data, columns=[ 'Keys', 'Year Over Year Change', 'Year Over Year Change (%)' ]) output_dir = join( self.root_path, 'out', splitext(f)[0].replace(" ", "_") + "_Quarterly_Report.csv") self.file_utils.write_csv(output_dir, output_data)
def plot_data(): data_path = FileUtils.get_abs_path(__file__, "./data/ex1data1.txt") data = np.loadtxt(data_path, delimiter=',') x = data[:, 0] y = data[:, 1] plt.scatter(x, y, marker='x', cmap='red') plt.xlabel("Population of City in 10,000s") plt.ylabel('Profit in $10,000s')
def load_project_config(): """Loads nordlys config file. If local file is provided, global one is ignored.""" config_path = os.sep.join([PROJECT_DIR, "config", "config.ini"]) try: if os.path.exists(config_path): return FileUtils.load_config(config_path) except Exception as e: print("Error loading config file: ", e) sys.exit(1)
def run(): data_path = FileUtils.get_abs_path(__file__, "./data/ex6data1.mat") mat = loadmat(data_path) X = mat["X"] y = mat["y"] plot_data(X, y) classifier = SVC(C=1, kernel="linear") classifier.fit(X, np.ravel(y)) plot_svc(classifier, X) x1 = np.array([1, 2, 1]) x2 = np.array([0, 4, -1]) sigma = 2 print(gaussian_kernel(x1, x2, sigma)) data_path = FileUtils.get_abs_path(__file__, "./data/ex6data2.mat") data2 = loadmat(data_path) y2 = data2['y'] X2 = data2['X'] plot_data(X2, y2) clf2 = SVC(kernel='rbf', gamma=30) clf2.fit(X2, y2.ravel()) plot_svc(clf2, X2) data_path = FileUtils.get_abs_path(__file__, "./data/ex6data3.mat") data3 = loadmat(data_path) X3 = data3["X"] y3 = data3["y"] Xval = data3["Xval"] yval = data3["yval"] plot_data(X3, y3) C, gamma = dataset_3_params(X3, y3, Xval, yval) clf3 = SVC(C=C, gamma=gamma) clf3.fit(X3, y3.ravel()) plot_svc(clf3, X3)
class YearlyAverageChangeAnalysis(object): def __init__(self): self.root_path = join(expanduser("~"), "PycharmProjects", "python-vistalytics", "source") self.file_utils = FileUtils() self.math_utils = MathUtils() def run(self): src_dir_path = join(self.root_path, "in", "annual") file_list = self.file_utils.get_files(src_dir_path) for f in file_list: key_list = [] avg_change_3_years_list = [] percentage_change_3_years_list = [] avg_change_5_years_list = [] percentage_change_5_years_list = [] data = self.file_utils.read_csv(src_dir_path, f) if 'TTM' in data: data = data.drop('TTM', 1) for i, v in data.iterrows(): # Prepare columns. size_val = len(v) key_list.append(i) avg_change_3_years_list.append(self.math_utils.average_change(v[size_val - 3:])) percentage_change_3_years_list.append(self.math_utils.percentage_change(v[size_val - 3], v[size_val - 1])) avg_change_5_years_list.append(self.math_utils.average_change(v[size_val - 5:])) percentage_change_5_years_list.append(self.math_utils.percentage_change(v[size_val - 5], v[size_val - 1])) # Write to csv. output_data = {'Keys': key_list, 'Average Change Over Last 3 Years': avg_change_3_years_list, 'Average Change Over Last 3 Years (%)': percentage_change_3_years_list, 'Average Change Over Last 5 Years': avg_change_5_years_list, 'Average Change Over Last 5 Years (%)': percentage_change_5_years_list} output_data_frame = pd.DataFrame(output_data, columns=['Keys', 'Average Change Over Last 3 Years', 'Average Change Over Last 3 Years (%)', 'Average Change Over Last 5 Years', 'Average Change Over Last 5 Years (%)']) output_dir_path = join(self.root_path, "out", splitext(f)[0].replace(" ", "_") + "_Annual_Report.csv") self.file_utils.write_csv(output_dir_path, output_data_frame)
def run(): data_path = FileUtils.get_abs_path(__file__, "./data/ex7data2.mat") mat = loadmat(data_path) X = mat["X"] K = 3 initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) idx = find_closest_centroids(X, initial_centroids) print("Closest centroids for the first 3 examples:\n", idx[0:3]) centroids = compute_centroids(X, idx, K) print("Centroids computed after initial finding of closest centroids:\n", centroids) m, n = X.shape[0], X.shape[1] initial_centroids = init_random_centroid(X, K) idx = find_closest_centroids(X, initial_centroids) plot_kmeans(X, initial_centroids, idx, K, 10) plt.show() data_path = FileUtils.get_abs_path(__file__, "./data/bird_small.png") A = plt.imread(data_path) A /= 255 img_size1, img_size2, rgb = A.shape X2 = A.reshape(img_size1 * img_size2, 3) K2 = 16 num_iters = 10 initial_centroids2 = init_random_centroid(X2, K2) centroids2, idx2 = run_kmeans(X2, initial_centroids2, num_iters, K2) X2_recovered = centroids2[idx2, :].reshape(A.shape) fig, ax = plt.subplots(1, 2, figsize=(8, 4)) ax[0].imshow(A * 255) ax[0].set_title('Original') ax[0].grid(False) # Display compressed image, rescale back by 255 ax[1].imshow(X2_recovered * 255) ax[1].set_title('Compressed, with %d colors' % K2) ax[1].grid(False) plt.show()
def run(): data_path = FileUtils.get_abs_path(__file__, "./data/ex1data1.txt") data = np.loadtxt(data_path, delimiter=',') n = np.size(data, 1) x = data[:, range(n - 1)] y = data[:, n - 1] m = np.size(y, 0) x = np.reshape(x, [m, n - 1]) y = np.reshape(y, [m, 1]) ones = np.ones([m, 1]) x = np.hstack([ones, x]) theta = np.zeros([n, 1]) alpha = 0.01 iterations = 1500 cost = cost_function_j(x, y, theta) print('Cost', cost) thetaRes, j_hist = gradient_descent(x, y, theta, alpha, iterations) print(thetaRes) cost = cost_function_j(x, y, str2arr('[-1;2]')) print(cost) theta0_vals = np.linspace(-10, 10, 100) theta1_vals = np.linspace(-1, 4, 100) J_vals = np.zeros([len(theta0_vals), len(theta1_vals)]) for i in range(len(theta0_vals)): for j in range(len(theta1_vals)): t = np.vstack([theta0_vals[i], theta1_vals[j]]) J_vals[i, j] = cost_function_j(x, y, t) pltData.plot_data() plt.plot(x[:, 1], x @ thetaRes, '-', color='red') fig1 = plt.figure() ax = fig1.add_subplot(111) ax.contour(theta0_vals, theta1_vals, J_vals, np.logspace(-2, 3, 20)) fig2 = plt.figure() ax2 = fig2.add_subplot(111, projection='3d') theta0_vals, theta1_vals = np.meshgrid(theta0_vals, theta1_vals) ax2.plot_surface(theta0_vals, theta1_vals, np.transpose(J_vals)) plt.show()
def run(): data_path = FileUtils.get_abs_path(__file__, "./data/ex8data1.mat") mat = loadmat(data_path) X = mat["X"] Xval = mat["Xval"] yval = mat["yval"] plt.scatter(X[:, 0], X[:, 1], marker="x") plt.xlim(0, 30) plt.ylim(0, 30) plt.xlabel("Latency (ms)") plt.ylabel("Throughput (mb/s)") plt.show() mu, sigma2 = estimate_gaussian(X) p = multivariate_gaussian(X, mu, sigma2) visualize_fit(X, mu, sigma2) pval = multivariate_gaussian(Xval, mu, sigma2) epsilon, F1 = select_threshold(yval, pval) print("Best epsilon found using cross-validation:", epsilon) print("Best F1 on Cross Validation Set:", F1) outliers = np.nonzero(p < epsilon)[0] plt.scatter(X[outliers, 0], X[outliers, 1], marker="o", facecolor="none", edgecolor="r", s=70) plt.xlim(0, 35) plt.ylim(0, 35) plt.xlabel("Latency (ms)") plt.ylabel("Throughput (mb/s)") plt.show()
def get_most_frequent_word_betwenn_all_commenters( path, most_frequent_word_per_author): """get most frequent words used between all commenters (users) :param path: path of commenter's comments :param most_frequent_word_per_author: count of most frequent word per user to be consider :return: most frequent words set """ most_frequent_words = set() for users_comments_file in sorted(os.listdir(path)): """ for each author get top-most frequent word and added that to word-set""" comments_train = FileUtils.get_list_of_comments( os.sep.join([path, users_comments_file])) word_list_train = WordBaseFeatureExtractor.get_word_list_frequency( comments_train) top_words = [word_freq[0] for word_freq in word_list_train] top_words = collections.OrderedDict.fromkeys(top_words) top_words = list(top_words.keys()) top_words = [ word for word in top_words[:most_frequent_word_per_author] ] most_frequent_words |= set(list(top_words)) return most_frequent_words
def run(): data_path = FileUtils.get_abs_path(__file__, "./data/ex1data2.txt") data = np.loadtxt(data_path, delimiter=',') n = np.size(data, 1) x = data[:, range(n - 1)] y = data[:, n - 1] m = np.size(y, 0) x = np.reshape(x, [m, n - 1]) y = np.reshape(y, [m, 1]) ones = np.ones([m, 1]) X, mu, sigma = feature_normalize(x) x = np.hstack([ones, x]) X = np.hstack([ones, X]) theta = np.zeros([n, 1]) alpha = 0.01 iterations = 400 cost = cost_function_j(X, y, theta) print('Cost', cost) thetaRes, j_hist = gradient_descent(X, y, theta, alpha, iterations) print('Theta using gradient descent:\n', thetaRes) print('Price of 1650 sq ft and 3 bedroom house: ', predict([[1653, 3]], thetaRes, mu, sigma)) plt.plot(range(400), j_hist) plt.xlabel("No of iterations") plt.ylabel("Cost") plt.title("Gradient Descent") plt.show() thetaRes = normal_equation(x, y) print('Theta using normal equation: \n', thetaRes) cost = thetaRes.T @ np.array([[1], [1650], [3]]) print('Price of 1650 sq ft and 3 bedroom house: ', cost[0][0])
from utils.doregister import SignUp from utils.doUpload import Upload from utils.db_utils import DbUtils from utils.tf_utils import TfUtils from utils.imgur_utils import ImgurUtils from utils.file_utils import FileUtils #delete random lib after we add in classification import random # Initialize App w/ config app = Flask(__name__) # TODO: move all config related stuff to separate app config class #Upload = Upload() #Upload.initialize_app_upload(app) fu = FileUtils() photo_set = fu.get_upload_set() fu.initialize_app_image_storage(app, photo_set) @app.route('/') @app.route('/home') def home(): return render_template('home.html') @app.route('/login', methods=['GET', 'POST']) def login(): if request.method == 'GET': if session.get('logged_in'): #TODO need to update session info here if person goes directly to profile so we have album_id
''' Created on 2021/07/01 @author: laurentmichel ''' import unittest from utils.file_utils import FileUtils from utils.test_runner import TestRunner mapping_sample = FileUtils.get_datadir() class Test(unittest.TestCase): def testOK(self): self.assertTrue(TestRunner.testOK(mapping_sample, "test_1"), "file should be valid") def testKO(self): self.assertTrue(TestRunner.testKO(mapping_sample, "test_1"), "file shouldn't be valid") if __name__ == "__main__": # import sys;sys.argv = ['', 'Test.testName'] unittest.main()
def main(): # Main program begins global args, log, app_path log = logging.getLogger("xenonmkv") console_handler = logging.StreamHandler() formatter = logging.Formatter( '%(asctime)s - %(name)s [%(levelname)s] %(message)s' ) console_handler.setFormatter(formatter) log.addHandler(console_handler) dependencies = ('mkvinfo', 'mediainfo', 'mkvextract', 'mplayer', 'faac', 'MP4Box') parser = argparse.ArgumentParser(description='Parse command line arguments ' 'for XenonMKV.') parser.add_argument('source_file', help='Path to the source MKV file') parser.add_argument('-d', '--destination', help="""Directory to output the destination .mp4 file (default: current directory)""", default='.') parser.add_argument('-sd', '--scratch-dir', help="""Specify a scratch directory where temporary files should be stored""", default=None) parser.add_argument('-cfg', '--config-file', help="""Provide a configuration file that contains default arguments or settings for the application""", default='') parser.add_argument("-p", '--profile', help="""Select a standardized device profile for encoding. Current profile options are: xbox360, playbook""", default="") output_group = parser.add_argument_group("Output options") output_group.add_argument('-q', '--quiet', help="""Do not display output or progress from tools, or prompt for input""", action='store_true') output_group.add_argument('-v', '--verbose', help='Verbose output', action='store_true') output_group.add_argument('-vv', '--debug', help='Highly verbose debug output', action='store_true') output_group.add_argument('-pf', '--print-file', help='Output filenames before and after converting', action='store_true') video_group = parser.add_argument_group("Video options", "Options for processing video.") video_group.add_argument('-nrp', '--no-round-par', help="""When processing video, do not round pixel aspect ratio from 0.98 to 1.01 to 1:1.""", action='store_true') video_group.add_argument('-irf', '--ignore-reference-frames', help="""If the source video has too many reference frames to play on low-powered devices (Xbox, PlayBook), continue converting anyway""", action='store_true') audio_group = parser.add_argument_group("Audio options", "Select custom audio decoding and " "encoding options.") audio_group.add_argument('-c', '--channels', help="""Specify the maximum number of channels that are acceptable in the output file. Certain devices (Xbox) will not play audio with more than two channels. If the audio needs to be re-encoded at all, it will be downmixed to two channels only. Possible values for this option are 2 (stereo); 4 (surround); 5.1 or 6 (full 5.1); 7.1 or 8 (full 7.1 audio). For more details, view the README file.""", default=6) audio_group.add_argument('-fq', '--faac-quality', help="""Quality setting for FAAC when encoding WAV files to AAC. Defaults to 150 (see http://wiki.hydrogenaudio.org/index.php?title=FAAC)""", default=150) track_group = parser.add_argument_group("Track options", "These options determine how multiple tracks " "in MKV files are selected.") track_group.add_argument('-st', '--select-tracks', help="""If there are multiple tracks in the MKV file, prompt to select which ones will be used. By default, the last video and audio tracks flagged as 'default' in the MKV file will be used. This option requires interactive user input, so do not use it in batch processing or scripts.""", action='store_true') track_group.add_argument('-vt', '--video-track', help="""Use the specified video track. If not present in the file, the default track will be used.""", type=int) track_group.add_argument('-at', '--audio-track', help="""Use the specified audio track. If not present in the file, the default track will be used.""", type=int) track_group.add_argument('-lang', '--preferred-language', help="""Provide a preferred language code in ISO 639-1 format ('en' for English, 'fr' for French, etc.) When picking tracks, this language will be preferred.""") proc_group = parser.add_argument_group("File and processing options", """These options determine how XenonMKV processes files and their contents.""") proc_group.add_argument('-rp', '--resume-previous', help="""Resume a previous run (do not recreate files if they already exist). Useful for debugging quickly if a conversion has already partially succeeded.""", action='store_true') proc_group.add_argument('-n', '--name', help="""Specify a name for the final MP4 container. Defaults to the original file name.""", default="") proc_group.add_argument('-tn', '--tag-name', help="""Specify a tag name for the final MP4 container. Defaults to the original file name.""", default="") proc_group.add_argument('-tc', '--tag-comment', help="""Specify a tag name for the final MP4 container.""", default="") proc_group.add_argument('-preserve', '--preserve-temp-files', help="""Preserve temporary files on the filesystem rather than deleting them at the end of each run.""", action='store_true', default=False) proc_group.add_argument("-eS", "--error-filesize", help="""Stop processing this file if it is over 4GiB. Files of this size will not be processed correctly by some devices such as the Xbox 360, and they will not save correctly to FAT32-formatted storage. By default, you will only see a warning message, and processing will continue.""", action="store_true") proc_group.add_argument('--mp4box-retries', help="""Set the number of retry attempts for MP4Box to attempt to create a file (default: 3)""", default=3, type=int) dep_group = parser.add_argument_group("Custom paths", "Set custom paths for the utilities used by " "XenonMKV.") for dependency in dependencies: dep_group.add_argument("--{0}-path".format(dependency.lower()), help="""Set a custom complete path for the {0} tool. Any library under that path will also be loaded.""".format(dependency)) if len(sys.argv) < 2: parser.print_help() sys.exit(1) args = parser.parse_args() config_file_output = False # If a configuration file was specified, attempt to read it. if args.config_file and os.path.isfile(args.config_file): config_file_output = parse_config_file(args) # Depending on the arguments, set the logging level appropriately. if args.quiet: log.setLevel(logging.ERROR) elif args.debug: log.setLevel(logging.DEBUG) log.debug("Using debug/highly verbose mode output") elif args.verbose: log.setLevel(logging.INFO) # If we parsed a configuration file, run through all logging output if config_file_output: for level, message in config_file_output: getattr(log, level)(message) # Pick temporary/scratch directory if not args.scratch_dir: if "TEMP" in os.environ: args.scratch_dir = os.environ["TEMP"] elif os.path.isdir("/var/tmp"): args.scratch_dir = "/var/tmp" else: args.scratch_dir = os.curdir # Apply selected profile if args.profile: args.channels = 2 args.error_filesize = True # Check for 5.1/7.1 audio with the channels setting if args.channels == "5.1": args.channels = 6 elif args.channels == "7.1": args.channels = 8 if args.channels not in ('2', '4', '6', '8', 2, 4, 6, 8): log.warning("An invalid number of channels was specified. " "Falling back to 2-channel stereo audio.") args.channels = 2 # Enforce channels as integer for comparison purposes later on args.channels = int(args.channels) # Ensure preferred language, if present, is lowercased and 2 characters if args.preferred_language: args.preferred_language = args.preferred_language.lower() if len(args.preferred_language) < 2: log.warning("Could not set preferred language code '{0}'".format( args.preferred_language)) args.preferred_language = None elif len(args.preferred_language) > 2: args.preferred_language = args.preferred_language[0:2] log.warning("Preferred language code truncated to '{0}'".format( args.preferred_language)) # Make sure user is not prompted for input if quiet option is used if args.quiet and args.select_tracks: log.warning("Cannot use interactive track selection in quiet mode. " "Tracks will be automatically selected.") args.select_tracks = False log.debug("Starting XenonMKV") # Check if we have a full file path or are just specifying a file if os.sep not in args.source_file: log.debug("Ensuring that we have a complete path to {0}".format( args.source_file)) args.source_file = os.path.join(os.getcwd(), args.source_file) log.debug("{0} will be used to reference the original MKV file".format( args.source_file)) # Always ensure destination path ends with a slash if not args.destination.endswith(os.sep): args.destination += os.sep if not args.scratch_dir.endswith(os.sep): args.scratch_dir += os.sep # Initialize file utilities f_utils = FileUtils(log, args) # Check if all dependent applications are installed and available in PATH, # or if they are specified. # If so, store them in args.tool_paths so all classes # have access to them as needed (args.tool_paths, args.library_paths) = f_utils.check_dependencies( dependencies ) # Check if source file exists and is an appropriate size try: f_utils.check_source_file(args.source_file) except IOError as e: log_exception("check_source_file", e) source_basename = os.path.basename(args.source_file) log.debug("Source Basename: {0}".format(source_basename)) source_noext = source_basename if not args.name: args.name = source_noext log.debug("Using '{0}' as final container name".format(args.name)) if not args.tag_name: args.tag_name = source_noext log.debug("Using '{0}' as tag name".format(args.tag_name)) # Check if destination directory exists try: f_utils.check_dest_dir(args.destination) except IOError as e: log_exception("check_dest_dir", e) log.info("Loading source file {0}".format(args.source_file)) if args.print_file: print "Processing: {0}".format(args.source_file) try: to_convert = MKVFile(args.source_file, log, args) to_convert.get_mkvinfo() except Exception as e: if not args.preserve_temp_files: cleanup_temp_files() log_exception("get_mkvinfo", e) # If the user knows which A/V tracks they want, set them. # MKVFile will not overwrite them. try_set_video_track(to_convert) try_set_audio_track(to_convert) try: # Check for multiple tracks if to_convert.has_multiple_av_tracks(): log.debug("Source file {0} has multiple audio or " "video tracks".format(args.source_file)) # First, pick default tracks, # which can be overridden in select_tracks to_convert.set_default_av_tracks() if args.select_tracks: video_tracks = to_convert.video_track_list() audio_tracks = to_convert.audio_track_list() if len(video_tracks) > 1: args.video_track = select_track("video", video_tracks) try_set_video_track(to_convert) if len(audio_tracks) > 1: args.audio_track = select_track("audio", audio_tracks) try_set_audio_track(to_convert) else: log.debug("Selected default audio and video tracks") else: # Pick default (or only) audio/video tracks log.debug("Source file {0} has 1 audio and 1 video track; " "using these".format(args.source_file)) to_convert.set_default_av_tracks() except Exception as e: if not args.preserve_temp_files: cleanup_temp_files() log_exception("set_default_av_tracks", e) # Next phase: Extract MKV files to scratch directory try: (video_file, audio_file) = to_convert.extract_mkv() except Exception as e: if not args.preserve_temp_files: cleanup_temp_files() log_exception("extract_mkv", e) # If needed, hex edit the video file to make it compliant # with a lower h264 profile level if video_file.endswith(".h264"): f_utils.hex_edit_video_file(video_file) # Detect which audio codec is in place and dump audio to WAV accordingly if not audio_file.endswith(".aac"): log.debug("Audio track {0} needs to be re-encoded".format(audio_file)) # use the appropriate AAC encoder to transform it to .aac enc = AACEncoder(args.scratch_dir, log, args) enc.encode() encoded_audio = os.path.join(args.scratch_dir, "audiodump.aac") else: # The audio track does not need to be re-encoded. # Reference the already-valid audio file and put it into the MP4 container. log.debug("Audio track {0} does not needs to be re-encoded".format(audio_file)) encoded_audio = audio_file # Now, throw things back together into a .mp4 container with MP4Box. video_track = to_convert.get_video_track() mp4box = MP4Box(video_file, encoded_audio, video_track.frame_rate, video_track.pixel_ar, args, log) try: mp4box.package() except Exception as e: if not args.preserve_temp_files: cleanup_temp_files() log_exception("package", e) # Move the file to the destination directory with the original name dest_path = os.path.join(args.destination, source_noext + ".mp4") log.info("Processing of {0} complete; file saved as {1}".format( args.source_file, dest_path)) # Delete temporary files if possible if not args.preserve_temp_files: cleanup_temp_files() log.debug("XenonMKV completed processing") if args.print_file: print "Completed: {0}".format(dest_path)
from utils.file_utils import FileUtils from utils.general_utils import GeneralUtils from pandas import DataFrame df = FileUtils.read_excel_file('resources/persian-swear-words.xlsx') swear_words = df.iloc[:, 0].tolist() for w in swear_words: for l in w: if l not in "آابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیيى ": print(w, ',', l) sorted_lst = [[w, ] for w in GeneralUtils.sort_lis_in_persian(swear_words) ] FileUtils.write_lists2excel_file(sorted_lst, 'resources/test.xlsx', ['واژه'])
def main(): # Main program begins global args, log, app_path log = logging.getLogger("xenonmkv") console_handler = logging.StreamHandler() formatter = logging.Formatter( '%(asctime)s - %(name)s [%(levelname)s] %(message)s') console_handler.setFormatter(formatter) log.addHandler(console_handler) dependencies = ('mkvinfo', 'mediainfo', 'mkvextract', 'mplayer', 'faac', 'MP4Box') parser = argparse.ArgumentParser( description='Parse command line arguments ' 'for XenonMKV.') parser.add_argument('source_file', help='Path to the source MKV file') parser.add_argument('-d', '--destination', help="""Directory to output the destination .mp4 file (default: current directory)""", default='.') parser.add_argument( '-sd', '--scratch-dir', help="""Specify a scratch directory where temporary files should be stored""", default=None) parser.add_argument('-cfg', '--config-file', help="""Provide a configuration file that contains default arguments or settings for the application""", default='') parser.add_argument( "-p", '--profile', help="""Select a standardized device profile for encoding. Current profile options are: xbox360, playbook""", default="") output_group = parser.add_argument_group("Output options") output_group.add_argument( '-q', '--quiet', help="""Do not display output or progress from tools, or prompt for input""", action='store_true') output_group.add_argument('-v', '--verbose', help='Verbose output', action='store_true') output_group.add_argument('-vv', '--debug', help='Highly verbose debug output', action='store_true') output_group.add_argument( '-pf', '--print-file', help='Output filenames before and after converting', action='store_true') video_group = parser.add_argument_group("Video options", "Options for processing video.") video_group.add_argument( '-nrp', '--no-round-par', help="""When processing video, do not round pixel aspect ratio from 0.98 to 1.01 to 1:1.""", action='store_true') video_group.add_argument( '-irf', '--ignore-reference-frames', help="""If the source video has too many reference frames to play on low-powered devices (Xbox, PlayBook), continue converting anyway""", action='store_true') audio_group = parser.add_argument_group( "Audio options", "Select custom audio decoding and " "encoding options.") audio_group.add_argument( '-c', '--channels', help="""Specify the maximum number of channels that are acceptable in the output file. Certain devices (Xbox) will not play audio with more than two channels. If the audio needs to be re-encoded at all, it will be downmixed to two channels only. Possible values for this option are 2 (stereo); 4 (surround); 5.1 or 6 (full 5.1); 7.1 or 8 (full 7.1 audio). For more details, view the README file.""", default=6) audio_group.add_argument( '-fq', '--faac-quality', help="""Quality setting for FAAC when encoding WAV files to AAC. Defaults to 150 (see http://wiki.hydrogenaudio.org/index.php?title=FAAC)""", default=150) track_group = parser.add_argument_group( "Track options", "These options determine how multiple tracks " "in MKV files are selected.") track_group.add_argument( '-st', '--select-tracks', help="""If there are multiple tracks in the MKV file, prompt to select which ones will be used. By default, the last video and audio tracks flagged as 'default' in the MKV file will be used. This option requires interactive user input, so do not use it in batch processing or scripts.""", action='store_true') track_group.add_argument( '-vt', '--video-track', help="""Use the specified video track. If not present in the file, the default track will be used.""", type=int) track_group.add_argument( '-at', '--audio-track', help="""Use the specified audio track. If not present in the file, the default track will be used.""", type=int) track_group.add_argument( '-lang', '--preferred-language', help="""Provide a preferred language code in ISO 639-1 format ('en' for English, 'fr' for French, etc.) When picking tracks, this language will be preferred.""" ) proc_group = parser.add_argument_group( "File and processing options", """These options determine how XenonMKV processes files and their contents.""" ) proc_group.add_argument( '-rp', '--resume-previous', help="""Resume a previous run (do not recreate files if they already exist). Useful for debugging quickly if a conversion has already partially succeeded.""", action='store_true') proc_group.add_argument('-n', '--name', help="""Specify a name for the final MP4 container. Defaults to the original file name.""", default="") proc_group.add_argument( '-tn', '--tag-name', help="""Specify a tag name for the final MP4 container. Defaults to the original file name.""", default="") proc_group.add_argument( '-tc', '--tag-comment', help="""Specify a tag name for the final MP4 container.""", default="") proc_group.add_argument( '-preserve', '--preserve-temp-files', help="""Preserve temporary files on the filesystem rather than deleting them at the end of each run.""", action='store_true', default=False) proc_group.add_argument( "-eS", "--error-filesize", help="""Stop processing this file if it is over 4GiB. Files of this size will not be processed correctly by some devices such as the Xbox 360, and they will not save correctly to FAT32-formatted storage. By default, you will only see a warning message, and processing will continue.""", action="store_true") proc_group.add_argument( '--mp4box-retries', help="""Set the number of retry attempts for MP4Box to attempt to create a file (default: 3)""", default=3, type=int) dep_group = parser.add_argument_group( "Custom paths", "Set custom paths for the utilities used by " "XenonMKV.") for dependency in dependencies: dep_group.add_argument( "--{0}-path".format(dependency.lower()), help="""Set a custom complete path for the {0} tool. Any library under that path will also be loaded.""".format(dependency)) if len(sys.argv) < 2: parser.print_help() sys.exit(1) args = parser.parse_args() config_file_output = False # If a configuration file was specified, attempt to read it. if args.config_file and os.path.isfile(args.config_file): config_file_output = parse_config_file(args) # Depending on the arguments, set the logging level appropriately. if args.quiet: log.setLevel(logging.ERROR) elif args.debug: log.setLevel(logging.DEBUG) log.debug("Using debug/highly verbose mode output") elif args.verbose: log.setLevel(logging.INFO) # If we parsed a configuration file, run through all logging output if config_file_output: for level, message in config_file_output: getattr(log, level)(message) # Pick temporary/scratch directory if not args.scratch_dir: if "TEMP" in os.environ: args.scratch_dir = os.environ["TEMP"] elif os.path.isdir("/var/tmp"): args.scratch_dir = "/var/tmp" else: args.scratch_dir = os.curdir # Apply selected profile if args.profile: args.channels = 2 args.error_filesize = True # Check for 5.1/7.1 audio with the channels setting if args.channels == "5.1": args.channels = 6 elif args.channels == "7.1": args.channels = 8 if args.channels not in ('2', '4', '6', '8', 2, 4, 6, 8): log.warning("An invalid number of channels was specified. " "Falling back to 2-channel stereo audio.") args.channels = 2 # Enforce channels as integer for comparison purposes later on args.channels = int(args.channels) # Ensure preferred language, if present, is lowercased and 2 characters if args.preferred_language: args.preferred_language = args.preferred_language.lower() if len(args.preferred_language) < 2: log.warning("Could not set preferred language code '{0}'".format( args.preferred_language)) args.preferred_language = None elif len(args.preferred_language) > 2: args.preferred_language = args.preferred_language[0:2] log.warning("Preferred language code truncated to '{0}'".format( args.preferred_language)) # Make sure user is not prompted for input if quiet option is used if args.quiet and args.select_tracks: log.warning("Cannot use interactive track selection in quiet mode. " "Tracks will be automatically selected.") args.select_tracks = False log.debug("Starting XenonMKV") # Check if we have a full file path or are just specifying a file if os.sep not in args.source_file: log.debug("Ensuring that we have a complete path to {0}".format( args.source_file)) args.source_file = os.path.join(os.getcwd(), args.source_file) log.debug("{0} will be used to reference the original MKV file".format( args.source_file)) # Always ensure destination path ends with a slash if not args.destination.endswith(os.sep): args.destination += os.sep if not args.scratch_dir.endswith(os.sep): args.scratch_dir += os.sep # Initialize file utilities f_utils = FileUtils(log, args) # Check if all dependent applications are installed and available in PATH, # or if they are specified. # If so, store them in args.tool_paths so all classes # have access to them as needed (args.tool_paths, args.library_paths) = f_utils.check_dependencies(dependencies) # Check if source file exists and is an appropriate size try: f_utils.check_source_file(args.source_file) except IOError as e: log_exception("check_source_file", e) source_basename = os.path.basename(args.source_file) log.debug("Source Basename: {0}".format(source_basename)) source_noext = source_basename if not args.name: args.name = source_noext log.debug("Using '{0}' as final container name".format(args.name)) if not args.tag_name: args.tag_name = source_noext log.debug("Using '{0}' as tag name".format(args.tag_name)) # Check if destination directory exists try: f_utils.check_dest_dir(args.destination) except IOError as e: log_exception("check_dest_dir", e) log.info("Loading source file {0}".format(args.source_file)) if args.print_file: print "Processing: {0}".format(args.source_file) try: to_convert = MKVFile(args.source_file, log, args) to_convert.get_mkvinfo() except Exception as e: if not args.preserve_temp_files: cleanup_temp_files() log_exception("get_mkvinfo", e) # If the user knows which A/V tracks they want, set them. # MKVFile will not overwrite them. try_set_video_track(to_convert) try_set_audio_track(to_convert) try: # Check for multiple tracks if to_convert.has_multiple_av_tracks(): log.debug("Source file {0} has multiple audio or " "video tracks".format(args.source_file)) # First, pick default tracks, # which can be overridden in select_tracks to_convert.set_default_av_tracks() if args.select_tracks: video_tracks = to_convert.video_track_list() audio_tracks = to_convert.audio_track_list() if len(video_tracks) > 1: args.video_track = select_track("video", video_tracks) try_set_video_track(to_convert) if len(audio_tracks) > 1: args.audio_track = select_track("audio", audio_tracks) try_set_audio_track(to_convert) else: log.debug("Selected default audio and video tracks") else: # Pick default (or only) audio/video tracks log.debug("Source file {0} has 1 audio and 1 video track; " "using these".format(args.source_file)) to_convert.set_default_av_tracks() except Exception as e: if not args.preserve_temp_files: cleanup_temp_files() log_exception("set_default_av_tracks", e) # Next phase: Extract MKV files to scratch directory try: (video_file, audio_file) = to_convert.extract_mkv() except Exception as e: if not args.preserve_temp_files: cleanup_temp_files() log_exception("extract_mkv", e) # If needed, hex edit the video file to make it compliant # with a lower h264 profile level if video_file.endswith(".h264"): f_utils.hex_edit_video_file(video_file) # Detect which audio codec is in place and dump audio to WAV accordingly if not audio_file.endswith(".aac"): log.debug("Audio track {0} needs to be re-encoded".format(audio_file)) # use the appropriate AAC encoder to transform it to .aac enc = AACEncoder(args.scratch_dir, log, args) enc.encode() encoded_audio = os.path.join(args.scratch_dir, "audiodump.aac") else: # The audio track does not need to be re-encoded. # Reference the already-valid audio file and put it into the MP4 container. log.debug("Audio track {0} does not needs to be re-encoded".format( audio_file)) encoded_audio = audio_file # Now, throw things back together into a .mp4 container with MP4Box. video_track = to_convert.get_video_track() mp4box = MP4Box(video_file, encoded_audio, video_track.frame_rate, video_track.pixel_ar, args, log) try: mp4box.package() except Exception as e: if not args.preserve_temp_files: cleanup_temp_files() log_exception("package", e) # Move the file to the destination directory with the original name dest_path = os.path.join(args.destination, source_noext + ".mp4") log.info("Processing of {0} complete; file saved as {1}".format( args.source_file, dest_path)) # Delete temporary files if possible if not args.preserve_temp_files: cleanup_temp_files() log.debug("XenonMKV completed processing") if args.print_file: print "Completed: {0}".format(dest_path)
def __init__(self): self.root_path = join(expanduser('~'), "PycharmProjects", "python-vistalytics", "source") self.math_utils = MathUtils() self.file_utils = FileUtils()
import sys, os from utils.logger_setup import LoggerSetup from utils.file_utils import FileUtils data_dir = FileUtils.get_datadir() project_dir = FileUtils.get_projectdir() schema_dir = FileUtils.get_schemadir() logger = LoggerSetup.get_logger() LoggerSetup.set_info_level() logger.info("utils package intialized")
def run(): data_path = FileUtils.get_abs_path(__file__, "./data/ex8_movies.mat") mat3 = loadmat(data_path) data_path = FileUtils.get_abs_path(__file__, "./data/ex8_movieParams.mat") mat4 = loadmat(data_path) Y = mat3[ "Y"] # 1682 X 943 matrix, containing ratings (1-5) of 1682 movies on 943 user R = mat3[ "R"] # 1682 X 943 matrix, where R(i,j) = 1 if and only if user j give rating to movie i X = mat4[ "X"] # 1682 X 10 matrix , num_movies X num_features matrix of movie features Theta = mat4[ "Theta"] # 943 X 10 matrix, num_users X num_features matrix of user features # Compute average rating print("Average rating for movie 1 (Toy Story):", np.sum(Y[0, :] * R[0, :]) / np.sum(R[0, :]), "/5") # Reduce the data set size to run faster num_users, num_movies, num_features = 4, 5, 3 X_test = X[:num_movies, :num_features] Theta_test = Theta[:num_users, :num_features] Y_test = Y[:num_movies, :num_users] R_test = R[:num_movies, :num_users] params = np.append(X_test.flatten(), Theta_test.flatten()) # Evaluate cost function J, grad = cofi_cost_function(params, Y_test, R_test, num_users, num_movies, num_features, 0) print("Cost at loaded parameters:", J) J2, grad2 = cofi_cost_function(params, Y_test, R_test, num_users, num_movies, num_features, 1.5) print("Cost at loaded parameters (lambda = 1.5):", J2) # load movie list data_path = FileUtils.get_abs_path(__file__, "./data/movie_ids.txt") movieList = open(data_path, "r").read().split("\n")[:-1] # see movie list # Initialize my ratings my_ratings = np.zeros((1682, 1)) # Create own ratings my_ratings[0] = 4 my_ratings[97] = 2 my_ratings[6] = 3 my_ratings[11] = 5 my_ratings[53] = 4 my_ratings[63] = 5 my_ratings[65] = 3 my_ratings[68] = 5 my_ratings[82] = 4 my_ratings[225] = 5 my_ratings[354] = 5 print("New user ratings:\n") for i in range(len(my_ratings)): if my_ratings[i] > 0: print("Rated", int(my_ratings[i]), "for index", movieList[i]) Y = np.hstack((my_ratings, Y)) R = np.hstack((my_ratings != 0, R)) # Normalize Ratings Ynorm, Ymean = normalize_ratings(Y, R) num_users = Y.shape[1] num_movies = Y.shape[0] num_features = 10 # Set initial Parameters (Theta,X) X = np.random.randn(num_movies, num_features) Theta = np.random.randn(num_users, num_features) initial_parameters = np.append(X.flatten(), Theta.flatten()) Lambda = 10 options = {'maxiter': 100} result = op.minimize(fun=cofi_cost_function, x0=initial_parameters, args=(Ynorm, R, num_users, num_movies, num_features, Lambda), method='TNC', jac=True, options=options) paramsFinal = result.x X = paramsFinal[0:num_movies * num_features].reshape( num_movies, num_features) Theta = paramsFinal[num_movies * num_features:].reshape( num_users, num_features) p = X @ Theta.T my_predictions = p[:, 0][:, np.newaxis] + Ymean df = pd.DataFrame( np.hstack((my_predictions, np.array(movieList)[:, np.newaxis]))) df.sort_values(by=[0], ascending=False, inplace=True) df.reset_index(drop=True, inplace=True) print("Top recommendations for you:\n") for i in range(10): print("Predicting rating", round(float(df[0][i]), 1), " for index", df[1][i])