def main(): files = [x for x in helper.get_files('.') if x.endswith('_complete.zip')] num_cores = multiprocessing.cpu_count() Parallel(n_jobs=num_cores)(delayed(process_file)(f) for f in files)
def main(): files = helper.get_files('customer review data') for f in files: reviews = process_file(f) generate_data(f, reviews) break pass
def process_dir(data_dir): """ Processes a directory containing a set of case documents and generates the citation data. The citation data thus generated shall be stored in {data_dir}.txt. """ case_files = helper.get_files(data_dir) ops = parse_files(case_files) # Write to file np.savetxt(data_dir + '.txt', ops, fmt='%s')
def process_dir(data_dir, b_size=None): """ Processes a directory containing a set of case documents and extracts the case data. The case data thus extracted shall be stored in {data_dir}/txt/. """ case_files = helper.get_files(data_dir) if b_size is not None: case_files = case_files[:b_size] target_path = os.path.join(data_dir, 'txt') write_files(parse_files(case_files), target_path) return target_path
def main(): #Arg handling if (len(sys.argv) == 4): #Give a default value TopK = 10 elif (not len(sys.argv) == 5): usage() pathToMfcc = os.path.abspath( sys.argv[1]) #root path under which .mfcc are present kmeansCenters = os.path.abspath(sys.argv[2]) #kmeans centers outpath = os.path.abspath(sys.argv[3]) #dir for the output TopK = sys.argv[4] #soft clustering #find out the number of kmeans centers by counting the number of rows clusterCount = str(len(open(kmeansCenters).readlines())) #get all the mfcc files print "Searching for " + os.environ["FORMAT"] + " files" mfccFiles = helper.get_files(pathToMfcc, os.environ["FORMAT"]) count = 1 name = os.path.join(outpath, "run_0.sh") script = open(name, "w") for mfcc in mfccFiles: if (count % SPLIT == 0): script.close() qsub(name) name = os.path.join(outpath, "run_" + str(count / SPLIT) + ".sh") script = open(name, "w") #create the new txyc file basename = os.path.basename(mfcc) txycFile = os.path.join(outpath, (basename.split("."))[0] + ".txyc") bofFIle = os.path.join(outpath, (basename.split("."))[0] + ".bof") script.write( mfcc2txyc(mfcc, kmeansCenters, clusterCount, outpath, TopK) + ";\n") script.write(txyc2bof(txycFile, clusterCount, outpath, TopK) + ";\n") count += 1 #complete the last script script.close() qsub(name)
def main(): base_dir = 'Apex AD2600 Progressive-scan DVD player' output_dir = 'Apex AD2600 Progressive-scan DVD player_output' files = helper.get_files(base_dir) total_precision = 0 total_recall = 0 total_f1_score = 0 count = 0 total_accuracy = 0 for f in files: if 'no_label_' not in f: continue true = f.replace('no_label_', '') empirical = os.path.join(output_dir, os.path.basename(f)) precision, recall, f1, accuracy = score_file(empirical, true) if precision is not None: count += 1 total_precision += precision total_recall += recall total_f1_score += f1 total_accuracy += accuracy print(f, 'Precision: ', precision, 'Recall: ', recall, 'F1', f1, 'accuracy', accuracy) pre = total_precision / count rec = total_recall / count f1s = total_f1_score / count acc = total_accuracy / count print('\n\nSummary:\n\n') print('Precision: ', pre, 'Recall: ', rec, 'F1:', f1s, 'Accuracy: ', acc)
else: mode = 'file' assert os.path.isdir(quartus_path) # Thanks http://web.engr.oregonstate.edu/~sllu/tools/vhdl.html vlib = quartus_path + '\\modelsim_ase\\win32aloem\\vlib.exe' assert os.path.isfile(vlib) vcom = quartus_path + '\\modelsim_ase\\win32aloem\\vcom.exe' assert os.path.isfile(vcom) quartus_project_dir = root + '\\quartus\\' cmd = [vlib, 'work'] if run_cmd(quartus_project_dir, cmd) != 0: print('vlib failed') sys.exit(1) if mode == 'file': cmd = [vcom, '-93', root + vhdl_src] if run_cmd(quartus_project_dir, cmd) != 0: print('vcom failed') sys.exit(1) if mode == 'dir': for file in get_files(root + vhdl_src, '.vhd'): cmd = [vcom, '-93', '-check_synthesis', file] if run_cmd(quartus_project_dir, cmd) != 0: print('vcom failed') sys.exit(1)
def is_valid_entry(points) -> bool: if np.isnan(points) or points == 0: return False return True def get_season(filename: str) -> str: return "_".join(filename.split("_")[1:]) def get_team(filename: str) -> str: return filename.split("_")[0] raw_files = get_files("raw_data") columns = ["player", "team", "season", "date", "points"] for file in list(raw_files): df = pd.read_csv(file) clean_df = pd.DataFrame(columns=columns) date_cols = df.columns[1:] for index, row in df.iterrows(): for date_col in date_cols: if is_valid_entry(row[date_col]): clean_date = convert_date(date_col, int(file.stem[-4:])) clean_df = clean_df.append( { "player": row["player"], "team": get_team(file.stem),
def process_dir(data_dir, MIN_N_GRAM, MAX_N_GRAM, b_verbose=False, b_size=None): """ Processes a directory containing a set of case documents and generates n-grams. The n-grams thus generated shall be stored in {data_dir}/n-grams/ """ target_dir = os.path.join(data_dir, 'n_grams') # Make sure the target directory exists helper.ensure_dir(target_dir) # Get the case file list case_files = helper.get_files(data_dir) if b_size is not None: case_files = case_files[:b_size] total_count = len(case_files) progress = 0 for case_file in case_files: # Compute the path to save the file target_file_name = os.path.basename(case_file) target_path = os.path.join(target_dir, target_file_name) # Read the case data from the string case_data = helper.read_file_to_string(case_file) valid_n_grams = {} # Go over every sentence in the document for sentence in get_sentences(case_data): pos_tuples = nltk.pos_tag(nltk.word_tokenize(sentence)) # Update the grammar if required and get the POS tags pos_tags = get_pos_tags(pos_tuples) # Generate N-Grams of tags n_grams = [] for n in range(MIN_N_GRAM, MAX_N_GRAM + 1): n_grams.extend([list(grams) for grams in ngrams(range(len(pos_tuples)), n)]) # Get only the n-grams that match the defined grammar for i in range(len(n_grams)): # Generate n-gram list and check validity if parse([pos_tags[j] for j in n_grams[i]]): # Append words to overall list elements = ' '.join([pos_tuples[k][0] for k in n_grams[i]]) if elements in valid_n_grams: valid_n_grams[elements] += 1 else: valid_n_grams[elements] = 1 # Save n-grams to file helper.save_dict_to_file(target_path, valid_n_grams) progress += 1 if b_verbose: print(progress / (0.01 * total_count), ' % Complete') return target_dir