def testMethodsCalled(self): """Test that the header, data, and footer methods are called.""" m = MockFormatter() utils.write_report(tempfile.mkstemp()[1], '', lambda fs: m) self.assertEqual(m.headered, 1) self.assertEqual(m.dataed, 1) self.assertEqual(m.footered, 1)
def generate_couplingrank_report(self, depgrp): """Generates a PageRank report for all code in self.filenames to self.couplingrank_filename. """ def factory(f): return depgraph.RankGoogleChartFormatter(f, self.rootdir) p = self.couplingrank_filename utils.write_report(p, depgrp, factory) self._filesforjump[p] = p, 'Report: Coupling PageRank'
def generate_coupling_report(self, depgrp): """Generates a report for Afferent and Efferent Coupling between all modules in self.filenames, saved to self.coupling_filename """ def factory(f): return depgraph.CouplingGoogleChartFormatter(f, self.rootdir) p = self.coupling_filename utils.write_report(p, depgrp, factory) self._filesforjump[p] = p, 'Report: Coupling'
def generate_sloc(self): """Generates a Source Lines of Code report for all files in self.files, output to self.sloc_filename. """ slocgrp = sloc.SlocGroup(self.filenames) def makeSlocFmt(f): return sloc.SlocGoogleChartFormatter(f, self.rootdir) p = self.sloc_filename utils.write_report(p, slocgrp, makeSlocFmt) self._filesforjump[p] = p, 'Report: SLOC'
def generate_cyclomatic_complexity(self): """Generates a cyclomatic complexity report for all files in self.files, output to self.cyclcompl_filename. """ ccdata, failures = cyclcompl.measure_cyclcompl(self.filenames) def makeFormatter(f): return cyclcompl.CCGoogleChartFormatter( f, leading_path=self.rootdir) p = self.cyclcompl_filename utils.write_report(p, (ccdata, failures), makeFormatter) self._filesforjump[p] = p, 'Report: Cyclomatic Complexity'
def cli(corpora_path, models_path, model_name, debug, verbose, iterations, l1, l2, hparams): """Command Line Interface para glosador automático del otomí (hñahñu) """ if debug: breakpoint() params_set = json.loads(hparams.read()) for params in params_set: hyper = param_setter(params, model_name, iterations, l1, l2) if hyper['dataset'] == "lezgi": corpus = XMLtoWords('FLExTxtExport2.xml') corpus = WordsToLetter(corpus) dataset = np.array(corpus) else: # Corpus en otomi base = 'corpus_otomi_' corpus = get_corpus(base + 'mod', corpora_path) hard_corpus = get_corpus(base + 'hard', corpora_path) corpus = WordsToLetter(corpus) hard_corpus = WordsToLetter(hard_corpus) dataset = np.array(corpus + hard_corpus, dtype=object) i = 0 partial_time = 0 accuracy_set = [] kf = KFold(n_splits=hyper['k-folds'], shuffle=True) print("*"*10) print("K FOLDS VALIDATION") print("*"*10) for train_index, test_index in kf.split(dataset): i += 1 print("\tK-Fold #", i) train_data, test_data = dataset[train_index], dataset[test_index] train_time, new_model_name = model_trainer(train_data, models_path, hyper, verbose, i) y_test, y_pred, tagger = model_tester(test_data, models_path, hyper, new_model_name, verbose) accuracy_set.append(accuracy_score(y_test, y_pred)) partial_time += train_time if verbose: print("*"*10) print("Partial Time>>", train_time, "Accuracy parcial>>", accuracy_set[i - 1]) eval_labeled_positions(y_test, y_pred) print(bio_classification_report(y_test, y_pred)) print("Accuracy Set -->", accuracy_set) accuracy = sum(accuracy_set) / len(accuracy_set) train_time_format = str(round(partial_time / 60, 2)) + "[m]" train_size = len(train_data) test_size = len(test_data) print("Time>>", train_time_format, "Accuracy>>", accuracy) write_report(new_model_name, train_size, test_size, accuracy, train_time_format, hyper)
def generate_cyclomatic_complexity(self): """Generates a cyclomatic complexity report for all files in self.files, output to self.cyclcompl_filename. """ ccdata, failures = cyclcompl.measure_cyclcompl(self.filenames) def makeFormatter(f): return cyclcompl.CCGoogleChartFormatter(f, leading_path=self.rootdir) p = self.cyclcompl_filename utils.write_report(p, (ccdata, failures), makeFormatter) self._filesforjump[p] = p, 'Report: Cyclomatic Complexity'
def main(): global ME_folders, first_fsf, FE_fsf, one_col, ME_csv, FE_csv, preproc_csv, first_csv, FE_dir, FE_dir, ME_dir, ME_dir, out_lines template_path="template2.xls" height_of_all_lines=0 #Parse options parser = argparse.ArgumentParser() parser.add_argument('-p', '--featpath') parser.add_argument('-o', '--out') parser.add_argument('-a', '--analysis') parser.add_argument('-c', '--config') parser.add_argument('-m','--manual_search') parser.add_argument('-s','--simple_output') args=parser.parse_args() config_file_path=args.config feat_folder_path=args.featpath simple_output=args.simple_output analysis=args.analysis if args.manual_search: search_down_method=0 else: search_down_method=1 if args.out is None: if analysis: out_path=analysis else: out_path='' die("Need to set output prefix with -o arg when using path to fsf (-p) arg") else: out_path=args.out #Check config file before if config_file_path is None: config_file_path="example.cfg" if feat_folder_path and analysis: die("Please use either -p <path to single feat folder> or\n"+ "or -a <analysis name>. Not both.") configuration=Configuration(config_file_path) #find the location of the feat folders within the directories from the config file if analysis: #Search down switch if search_down_method: #find ME directories that match analysis pattern ME_list=os.listdir(os.path.join(configuration.ME_dir)) ME_folders=list() for folder in ME_list: analysis_match=re.search(analysis+configuration.me_pattern, folder) if analysis_match: combined=os.path.join(configuration.ME_dir,folder) if os.path.isdir(combined): ME_folders.append(combined) #Catch bad analysis name at ME Level if not ME_folders: die("No analysis folders found at ME level. Do you have the right analysis name?") #load any ME directory ME_fsf=FsfFile(os.path.join(ME_folders[0],'design.fsf')) ME_inputs=ME_fsf.inputs fe_fsf_path='' me_input_count=1 while not fe_fsf_path: try: fe_fsf_path=ME_inputs[str(me_input_count)].strip('\"')+'/design.fsf' except KeyError: break if not os.path.isfile(fe_fsf_path): me_input_count+=1 fe_fsf_path='' FE_fsf=FsfFile(fe_fsf_path) FE_inputs=FE_fsf.inputs other_FES=list() input_fsf_path=get_input_fsf(FE_inputs) input_fsf=FsfFile(input_fsf_path) while input_fsf.type == input_fsf.FE_TYPE: other_FES.append(input_fsf) input_fsf_path=get_input_fsf(input_fsf.inputs) input_fsf=FsfFile(input_fsf_path) first_fsf=input_fsf one_col=list() if first_fsf.type == first_fsf.FIRST_TYPE: one_col.extend(fsf_to_one_column(first_fsf)) one_col.append(",\n") first_csv=fsf_to_csv(first_fsf) if hasattr(first_fsf,'preproc'): preprocdir=os.path.join(configuration.first_level_dir,first_fsf.preproc) preproc_fsf=FsfFile((os.path.join(preprocdir,'design.fsf'))) preproc_csv=fsf_to_csv(preproc_fsf) one_col.extend(fsf_to_one_column(preproc_fsf)) one_col.append(",\n") else: preproc_csv=None else: first_csv=None preproc_csv=None print "First level not loaded or design file is corrupt. Not adding to output" if FE_fsf.type == FE_fsf.FE_TYPE: one_col.append(",\n") FE_csv=fsf_to_csv(FE_fsf) one_col.extend(fsf_to_one_column(FE_fsf)) one_col.append(",\n") else: FE_csv=None print "No fixed effects loaded, data will not be included in output" if ME_fsf: ME_csv=fsf_to_csv(ME_fsf) one_col.extend(fsf_to_one_column(ME_fsf)) one_col.append(",\n") else: ME_csv=None print "No Mixed effects loaded, data will not be included in output" #out_lines=combine_for_csv(first_csv,height_of_all_lines,preproc_csv,FE_csv,ME_csv) out_lines=list() if first_csv: if preproc_csv: out_lines=combine_left_right(preproc_csv[0],first_csv[0]) else: out_lines=first_csv[0] if FE_csv: if other_FES: size_of_others=len(other_FES)-1 print size_of_others while size_of_others >= 0: print size_of_others temp_csv=fsf_to_csv(other_FES[size_of_others]) out_lines=combine_left_right(out_lines,temp_csv[0]) size_of_others -= 1 out_lines=combine_left_right(out_lines,FE_csv[0]) if ME_csv: out_lines=combine_left_right(out_lines,ME_csv[0]) else: #Old method of searching ME_list=os.listdir(os.path.join(configuration.ME_dir)) ME_folders=list() for folder in ME_list: analysis_match=re.search(analysis+"_cope", folder) if analysis_match: combined=os.path.join(configuration.ME_dir,folder) if os.path.isdir(combined): ME_folders.append(combined) first_list=os.listdir(os.path.join(configuration.first_level_dir)) first_folder='' for folder in first_list: analysis_match=re.search(analysis+'.feat', folder) if analysis_match: combined=os.path.join(configuration.first_level_dir,folder) if os.path.isdir(combined): first_folder=combined break FE_list=os.listdir(os.path.join(configuration.FE_dir)) FE_folder='' for folder in FE_list: analysis_match=re.search(analysis+'.gfeat', folder) if analysis_match: combined=os.path.join(configuration.FE_dir,folder) if os.path.isdir(combined): FE_folder=combined break one_col=list() #load fsf files using FsfFile class first_fsf=FsfFile(os.path.join(first_folder,'design.fsf')) if first_fsf.type == first_fsf.FIRST_TYPE: one_col.extend(fsf_to_one_column(first_fsf)) one_col.append(",\n") first_csv=fsf_to_csv(first_fsf) if first_csv[2] > height_of_all_lines: height_of_all_lines=first_csv[2] if hasattr(first_fsf,'preproc'): preprocdir=os.path.join(configuration.first_level_dir,first_fsf.preproc) preproc_fsf=FsfFile((os.path.join(preprocdir,'design.fsf'))) preproc_csv=fsf_to_csv(preproc_fsf) one_col.extend(fsf_to_one_column(preproc_fsf)) one_col.append(",\n") else: print "First level not loaded or design file is corrupt. Not adding to output" FE_fsf=FsfFile(os.path.join(FE_folder,'design.fsf')) if FE_fsf.type == FE_fsf.FE_TYPE: one_col.append(",\n") FE_csv=fsf_to_csv(FE_fsf) if FE_csv[2] > height_of_all_lines: height_of_all_lines=FE_csv[2] one_col.extend(fsf_to_one_column(FE_fsf)) one_col.append(",\n") else: FE_csv=None print "No fixed effects loaded, data will not be included in output" ME_fsf=FsfFile(os.path.join(ME_folders[0],'design.fsf')) if ME_fsf: ME_csv=fsf_to_csv(ME_fsf) if ME_csv[2] > height_of_all_lines: height_of_all_lines=ME_csv[2] one_col.extend(fsf_to_one_column(ME_fsf)) one_col.append(",\n") else: print "No Mixed effects loaded, data will not be included in output" out_lines=combine_for_csv(first_csv,height_of_all_lines,preproc_csv,FE_csv,ME_csv) new_one=list() for row in one_col: new_one.append(row+'\n') write_report(out_lines,out_path+".csv") write_report(new_one,out_path+"_one.csv") excel_output_path=out_path+'.xls' #prep fe names if hasattr(FE_fsf, 'cons'): fe_cope_names=dict() for item in FE_fsf.cons.items(): key,contrast=item fe_cope_names[key]=contrast.name else: #TODO FE hack for the screwed up stroops. Remove after done. FE_fsf=FsfFile(os.path.join("/Volumes/storage/TAF_fanal/PV/FE2/x301fe_a5t.gfeat/design.fsf")) fe_cope_names=dict() for item in FE_fsf.cons.items(): key,contrast=item fe_cope_names[key]=contrast.name first_cope_names=dict() for item in first_fsf.cons.items(): key,contrast=item first_cope_names[key]=contrast.name if simple_output is None: excel=ExcelResults(fe_cope_names,first_cope_names, ME_folders, excel_output_path,configuration) excel.main() elif feat_folder_path : fsf_single=FsfFile(os.path.join(feat_folder_path)) fsf_csv=combine_for_csv(fsf_to_csv(fsf_single)) one_lines=list() for row in fsf_to_one_column(fsf_single): one_lines.append(row+'\n') write_report(fsf_csv,out_path+'.csv') write_report(one_lines,out_path+'_one.csv')
arg_parser.add_argument("--extract", action="store_true", help="Run the extractor.") arg_parser.add_argument("--scrape", action="store_true", help="Run the scraper.") arg_parser.add_argument("--headless", action="store_true", help="No window for selenium scraper.") args = arg_parser.parse_args() if __name__ == "__main__": if not path.exists(FILES_DIR_PATH): os.mkdir(FILES_DIR_PATH) if args.scrape: urls = read_input_from_csv(args.csv_file) with DetailPageScraper(args.headless) as scraper: for url in tqdm(urls): scraper.make_request(url) if args.extract: parser = DetailPageParser() files = get_files_to_parse() data = [] for file_name in tqdm(files): with open(file_name, "r") as file: data.append(parser.extract(file.read())) write_report(data)
def main(): parser = argparse.ArgumentParser( description='Process input information for web scrapping.') parser.add_argument('action', type=str, choices=['scrape', 'report'], help='define the scrapping action') parser.add_argument('-c', default='criteria.json', help='criteria file to scrapping against') parser.add_argument( '-o', default='report.json', help='report file generated against specified criteria') parser.add_argument('--url', default=settings.ROOT_URL, help='url to scrape information from') args = parser.parse_args() criteria_data = utils.get_criteria(args.c) if args.action == 'scrape': criteria_cut_off_date = dt_parser.parse(criteria_data['cut_off_date']) response = requests.get(args.url) parsed_html = BeautifulSoup(response.text, 'html.parser') articles = parsed_html.body.find_all('div', attrs={'class': 'post'}) db_utils.clean_db(settings.DATABASE) for article in articles: article_publish_date_str = article.find( 'p', attrs={ 'class': 'fusion-single-line-meta' }).contents[4].text article_publish_date = dt_parser.parse(article_publish_date_str) if article_publish_date < criteria_cut_off_date: continue else: article_data = utils.fetch_articles_from_site(article) article_data['date'] = article_publish_date db_utils.write_articles_to_db(settings.DATABASE, article_data) if args.action == 'report': fetched_db_data = db_utils.fetch_articles_from_db( settings.DATABASE, criteria_data) serialized_data = { 'criteria': criteria_data, 'common_words': list(itertools.chain(*(row[1] for row in fetched_db_data))), 'articles': [ utils.serialize_article_from_db_row(row[0]) for row in fetched_db_data ] } utils.write_report(serialized_data, args.o)
def main(args): """ Main function for classification with imputed dataset Args: - from_id: start index to file list - to_id: end index to file list - fold_size: fold_size start from index 1 Returns: - """ # Input parameters from_id = args.from_id to_id = args.to_id fold_size = args.fold_size # Initial parameters binary_classifiers = [1, 1, 1, 1] # 1: Activate or 0: Deactivate classfication_flag = [ i for i, clsf in enumerate(binary_classifiers) if clsf == 1 ] missingness_flag = [0, 10, 20, 30, 40, 50] # t% missing data # Loading data for i_file in range(from_id, to_id): file_name = file_list[i_file] print(datetime.datetime.now(), "File {}: {}".format(i_file, file_name)) file_data_path = os.path.join(imputed_dataset, file_name) result_data_path = os.path.join(result_path, file_name) for name_imputation in os.listdir(file_data_path): for missing in missingness_flag: for clf_flag in classfication_flag: dict_eval = { 'accuracy': [], 'p_macro': [], 'r_macro': [], 'f1_macro': [], 'p_micro': [], 'r_micro': [], 'f1_micro': [] } for i in range(1, fold_size): D_train, D_test = csv_reader(file_data_path, name_imputation, i, method='data_missing', missingness=missing) features_D_train = D_train[:, :-1] labels_D_train = D_train[:, -1].astype(np.int32) features_D_test = D_test[:, :-1] labels_D_test = D_test[:, -1].astype(np.int32) classes = np.unique(labels_D_test) n_classes = len(classes) labels_predicted, name_classification_algo = model_prediction( features_D_train, features_D_test, labels_D_train, clf_flag, n_classes) accuracy, p_macro, r_macro, f1_macro, p_micro, r_micro, f1_micro = evaluation_report( labels_predicted, labels_D_test) dict_eval['accuracy'].append(accuracy) dict_eval['p_macro'].append(p_macro) dict_eval['r_macro'].append(r_macro) dict_eval['f1_macro'].append(f1_macro) dict_eval['p_micro'].append(p_micro) dict_eval['r_micro'].append(r_micro) dict_eval['f1_micro'].append(f1_micro) write_report(dict_eval, result_data_path, name_imputation, missing, name_classification_algo)