Example #1
0
 def testMethodsCalled(self):
     """Test that the header, data, and footer methods are called."""
     m = MockFormatter()
     utils.write_report(tempfile.mkstemp()[1], '', lambda fs: m)
     self.assertEqual(m.headered, 1)
     self.assertEqual(m.dataed, 1)
     self.assertEqual(m.footered, 1)
Example #2
0
 def generate_couplingrank_report(self, depgrp):
     """Generates a PageRank report for all code in self.filenames to
     self.couplingrank_filename.
     """
     def factory(f):
         return depgraph.RankGoogleChartFormatter(f, self.rootdir)
     p = self.couplingrank_filename
     utils.write_report(p, depgrp, factory)
     self._filesforjump[p] = p, 'Report: Coupling PageRank'
Example #3
0
    def generate_couplingrank_report(self, depgrp):
        """Generates a PageRank report for all code in self.filenames to
        self.couplingrank_filename.
        """
        def factory(f):
            return depgraph.RankGoogleChartFormatter(f, self.rootdir)

        p = self.couplingrank_filename
        utils.write_report(p, depgrp, factory)
        self._filesforjump[p] = p, 'Report: Coupling PageRank'
Example #4
0
 def generate_coupling_report(self, depgrp):
     """Generates a report for Afferent and Efferent Coupling between
     all modules in self.filenames,
     saved to self.coupling_filename
     """
     def factory(f):
         return depgraph.CouplingGoogleChartFormatter(f, self.rootdir)
     p = self.coupling_filename
     utils.write_report(p, depgrp, factory)
     self._filesforjump[p] = p, 'Report: Coupling'
Example #5
0
 def generate_sloc(self):
     """Generates a Source Lines of Code report for all files in self.files,
     output to self.sloc_filename.
     """
     slocgrp = sloc.SlocGroup(self.filenames)
     def makeSlocFmt(f):
         return sloc.SlocGoogleChartFormatter(f, self.rootdir)
     p = self.sloc_filename
     utils.write_report(p, slocgrp, makeSlocFmt)
     self._filesforjump[p] = p, 'Report: SLOC'
Example #6
0
    def generate_coupling_report(self, depgrp):
        """Generates a report for Afferent and Efferent Coupling between
        all modules in self.filenames,
        saved to self.coupling_filename
        """
        def factory(f):
            return depgraph.CouplingGoogleChartFormatter(f, self.rootdir)

        p = self.coupling_filename
        utils.write_report(p, depgrp, factory)
        self._filesforjump[p] = p, 'Report: Coupling'
Example #7
0
 def generate_cyclomatic_complexity(self):
     """Generates a cyclomatic complexity report for all files in self.files,
     output to self.cyclcompl_filename.
     """
     ccdata, failures = cyclcompl.measure_cyclcompl(self.filenames)
     def makeFormatter(f):
         return cyclcompl.CCGoogleChartFormatter(
             f, leading_path=self.rootdir)
     p = self.cyclcompl_filename
     utils.write_report(p, (ccdata, failures), makeFormatter)
     self._filesforjump[p] = p, 'Report: Cyclomatic Complexity'
Example #8
0
    def generate_sloc(self):
        """Generates a Source Lines of Code report for all files in self.files,
        output to self.sloc_filename.
        """
        slocgrp = sloc.SlocGroup(self.filenames)

        def makeSlocFmt(f):
            return sloc.SlocGoogleChartFormatter(f, self.rootdir)

        p = self.sloc_filename
        utils.write_report(p, slocgrp, makeSlocFmt)
        self._filesforjump[p] = p, 'Report: SLOC'
def cli(corpora_path, models_path, model_name, debug, verbose, iterations,
        l1, l2, hparams):
    """Command Line Interface para glosador automático del otomí (hñahñu)
    """
    if debug:
        breakpoint()
    params_set = json.loads(hparams.read())
    for params in params_set:
        hyper = param_setter(params, model_name, iterations, l1, l2)
        if hyper['dataset'] == "lezgi":
            corpus = XMLtoWords('FLExTxtExport2.xml')
            corpus = WordsToLetter(corpus)
            dataset = np.array(corpus)
        else: # Corpus en otomi
            base = 'corpus_otomi_'
            corpus = get_corpus(base + 'mod', corpora_path)
            hard_corpus = get_corpus(base + 'hard', corpora_path)
            corpus = WordsToLetter(corpus)
            hard_corpus = WordsToLetter(hard_corpus)
            dataset = np.array(corpus + hard_corpus, dtype=object)
        i = 0
        partial_time = 0
        accuracy_set = []
        kf = KFold(n_splits=hyper['k-folds'], shuffle=True)
        print("*"*10)
        print("K FOLDS VALIDATION")
        print("*"*10)
        for train_index, test_index in kf.split(dataset):
            i += 1
            print("\tK-Fold #", i)
            train_data, test_data = dataset[train_index], dataset[test_index]
            train_time, new_model_name = model_trainer(train_data, models_path,
                                                       hyper, verbose, i)
            y_test, y_pred, tagger = model_tester(test_data, models_path,
                                                  hyper, new_model_name,
                                                  verbose)
            accuracy_set.append(accuracy_score(y_test, y_pred))
            partial_time += train_time
            if verbose:
                print("*"*10)
                print("Partial Time>>", train_time, "Accuracy parcial>>",
                      accuracy_set[i - 1])
                eval_labeled_positions(y_test, y_pred)
                print(bio_classification_report(y_test, y_pred))
        print("Accuracy Set -->", accuracy_set)
        accuracy = sum(accuracy_set) / len(accuracy_set)
        train_time_format = str(round(partial_time / 60, 2)) + "[m]"
        train_size = len(train_data)
        test_size = len(test_data)
        print("Time>>", train_time_format, "Accuracy>>", accuracy)
        write_report(new_model_name, train_size, test_size, accuracy,
                     train_time_format, hyper)
Example #10
0
    def generate_cyclomatic_complexity(self):
        """Generates a cyclomatic complexity report for all files in self.files,
        output to self.cyclcompl_filename.
        """
        ccdata, failures = cyclcompl.measure_cyclcompl(self.filenames)

        def makeFormatter(f):
            return cyclcompl.CCGoogleChartFormatter(f,
                                                    leading_path=self.rootdir)

        p = self.cyclcompl_filename
        utils.write_report(p, (ccdata, failures), makeFormatter)
        self._filesforjump[p] = p, 'Report: Cyclomatic Complexity'
Example #11
0
def main():
    global ME_folders, first_fsf, FE_fsf, one_col, ME_csv, FE_csv, preproc_csv, first_csv, FE_dir, FE_dir, ME_dir, ME_dir, out_lines
    template_path="template2.xls"
    height_of_all_lines=0

    #Parse options
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--featpath')
    parser.add_argument('-o', '--out')
    parser.add_argument('-a', '--analysis')
    parser.add_argument('-c', '--config')
    parser.add_argument('-m','--manual_search')
    parser.add_argument('-s','--simple_output')
    args=parser.parse_args()
    config_file_path=args.config
    feat_folder_path=args.featpath
    simple_output=args.simple_output
    analysis=args.analysis

    if args.manual_search:
        search_down_method=0
    else:
        search_down_method=1
    if args.out is None:
        if analysis:
            out_path=analysis
        else:
            out_path=''
            die("Need to set output prefix with -o arg when using path to fsf (-p) arg")
    else:
        out_path=args.out

    #Check config file before
    if config_file_path is None:
        config_file_path="example.cfg"
    if feat_folder_path and analysis:
        die("Please use either -p <path to single feat folder> or\n"+
            "or -a <analysis name>. Not both.")

    configuration=Configuration(config_file_path)

    #find the location of the feat folders within the directories from the config file
    if analysis:
        #Search down switch
        if search_down_method:
            #find ME directories that match analysis pattern
            ME_list=os.listdir(os.path.join(configuration.ME_dir))
            ME_folders=list()
            for folder in ME_list:
                analysis_match=re.search(analysis+configuration.me_pattern, folder)
                if analysis_match:
                    combined=os.path.join(configuration.ME_dir,folder)
                    if os.path.isdir(combined):
                        ME_folders.append(combined)

            #Catch bad analysis name at ME Level
            if not ME_folders:
                die("No analysis folders found at ME level. Do you have the right analysis name?")

            #load any ME directory
            ME_fsf=FsfFile(os.path.join(ME_folders[0],'design.fsf'))
            ME_inputs=ME_fsf.inputs
            fe_fsf_path=''
            me_input_count=1
            while not fe_fsf_path:
                try:
                    fe_fsf_path=ME_inputs[str(me_input_count)].strip('\"')+'/design.fsf'
                except KeyError:
                    break
                if not os.path.isfile(fe_fsf_path):
                    me_input_count+=1
                    fe_fsf_path=''
            FE_fsf=FsfFile(fe_fsf_path)
            FE_inputs=FE_fsf.inputs
            other_FES=list()
            input_fsf_path=get_input_fsf(FE_inputs)
            input_fsf=FsfFile(input_fsf_path)
            while input_fsf.type == input_fsf.FE_TYPE:
                other_FES.append(input_fsf)
                input_fsf_path=get_input_fsf(input_fsf.inputs)
                input_fsf=FsfFile(input_fsf_path)
            first_fsf=input_fsf
            one_col=list()
            if first_fsf.type == first_fsf.FIRST_TYPE:
                one_col.extend(fsf_to_one_column(first_fsf))
                one_col.append(",\n")
                first_csv=fsf_to_csv(first_fsf)
                if hasattr(first_fsf,'preproc'):
                    preprocdir=os.path.join(configuration.first_level_dir,first_fsf.preproc)
                    preproc_fsf=FsfFile((os.path.join(preprocdir,'design.fsf')))
                    preproc_csv=fsf_to_csv(preproc_fsf)
                    one_col.extend(fsf_to_one_column(preproc_fsf))
                    one_col.append(",\n")
                else:
                    preproc_csv=None
            else:
                first_csv=None
                preproc_csv=None
                print "First level not loaded or design file is corrupt. Not adding to output"

            if FE_fsf.type == FE_fsf.FE_TYPE:
                one_col.append(",\n")
                FE_csv=fsf_to_csv(FE_fsf)
                one_col.extend(fsf_to_one_column(FE_fsf))
                one_col.append(",\n")
            else:
                FE_csv=None
                print "No fixed effects loaded, data will not be included in output"

            if ME_fsf:
                ME_csv=fsf_to_csv(ME_fsf)
                one_col.extend(fsf_to_one_column(ME_fsf))
                one_col.append(",\n")
            else:
                ME_csv=None
                print "No Mixed effects loaded, data will not be included in output"

            #out_lines=combine_for_csv(first_csv,height_of_all_lines,preproc_csv,FE_csv,ME_csv)
            out_lines=list()
            if first_csv:
                if preproc_csv:
                    out_lines=combine_left_right(preproc_csv[0],first_csv[0])
                else:
                    out_lines=first_csv[0]
            if FE_csv:
                if other_FES:
                    size_of_others=len(other_FES)-1
                    print size_of_others
                    while size_of_others >= 0:

                        print size_of_others
                        temp_csv=fsf_to_csv(other_FES[size_of_others])
                        out_lines=combine_left_right(out_lines,temp_csv[0])
                        size_of_others -= 1

                out_lines=combine_left_right(out_lines,FE_csv[0])
            if ME_csv:
                out_lines=combine_left_right(out_lines,ME_csv[0])
        else:
            #Old method of searching
            ME_list=os.listdir(os.path.join(configuration.ME_dir))
            ME_folders=list()
            for folder in ME_list:
                analysis_match=re.search(analysis+"_cope", folder)
                if analysis_match:
                    combined=os.path.join(configuration.ME_dir,folder)
                    if os.path.isdir(combined):
                        ME_folders.append(combined)

            first_list=os.listdir(os.path.join(configuration.first_level_dir))
            first_folder=''
            for folder in first_list:
                analysis_match=re.search(analysis+'.feat', folder)
                if analysis_match:
                    combined=os.path.join(configuration.first_level_dir,folder)
                    if os.path.isdir(combined):
                        first_folder=combined
                        break

            FE_list=os.listdir(os.path.join(configuration.FE_dir))
            FE_folder=''
            for folder in FE_list:
                analysis_match=re.search(analysis+'.gfeat', folder)
                if analysis_match:
                    combined=os.path.join(configuration.FE_dir,folder)
                    if os.path.isdir(combined):
                        FE_folder=combined
                        break

            one_col=list()

            #load fsf files using FsfFile class
            first_fsf=FsfFile(os.path.join(first_folder,'design.fsf'))
            if first_fsf.type == first_fsf.FIRST_TYPE:
                one_col.extend(fsf_to_one_column(first_fsf))
                one_col.append(",\n")
                first_csv=fsf_to_csv(first_fsf)
                if first_csv[2] > height_of_all_lines:
                    height_of_all_lines=first_csv[2]
                if hasattr(first_fsf,'preproc'):
                    preprocdir=os.path.join(configuration.first_level_dir,first_fsf.preproc)
                    preproc_fsf=FsfFile((os.path.join(preprocdir,'design.fsf')))
                    preproc_csv=fsf_to_csv(preproc_fsf)
                    one_col.extend(fsf_to_one_column(preproc_fsf))
                    one_col.append(",\n")
            else:
                print "First level not loaded or design file is corrupt. Not adding to output"


            FE_fsf=FsfFile(os.path.join(FE_folder,'design.fsf'))
            if FE_fsf.type == FE_fsf.FE_TYPE:
                one_col.append(",\n")
                FE_csv=fsf_to_csv(FE_fsf)
                if FE_csv[2] > height_of_all_lines:
                    height_of_all_lines=FE_csv[2]
                one_col.extend(fsf_to_one_column(FE_fsf))
                one_col.append(",\n")
            else:
                FE_csv=None
                print "No fixed effects loaded, data will not be included in output"

            ME_fsf=FsfFile(os.path.join(ME_folders[0],'design.fsf'))
            if ME_fsf:
                ME_csv=fsf_to_csv(ME_fsf)
                if ME_csv[2] > height_of_all_lines:
                    height_of_all_lines=ME_csv[2]
                one_col.extend(fsf_to_one_column(ME_fsf))
                one_col.append(",\n")
            else:
                print "No Mixed effects loaded, data will not be included in output"

            out_lines=combine_for_csv(first_csv,height_of_all_lines,preproc_csv,FE_csv,ME_csv)


        new_one=list()
        for row in one_col:
            new_one.append(row+'\n')
        write_report(out_lines,out_path+".csv")
        write_report(new_one,out_path+"_one.csv")
        excel_output_path=out_path+'.xls'
        #prep fe names
        if hasattr(FE_fsf, 'cons'):
            fe_cope_names=dict()
            for item in FE_fsf.cons.items():
                key,contrast=item
                fe_cope_names[key]=contrast.name
        else:
            #TODO FE hack for the screwed up stroops. Remove after done.
            FE_fsf=FsfFile(os.path.join("/Volumes/storage/TAF_fanal/PV/FE2/x301fe_a5t.gfeat/design.fsf"))
            fe_cope_names=dict()
            for item in FE_fsf.cons.items():
                key,contrast=item
                fe_cope_names[key]=contrast.name

        first_cope_names=dict()
        for item in first_fsf.cons.items():
            key,contrast=item
            first_cope_names[key]=contrast.name

        if simple_output is None:
            excel=ExcelResults(fe_cope_names,first_cope_names, ME_folders, excel_output_path,configuration)
            excel.main()
    elif feat_folder_path :
        fsf_single=FsfFile(os.path.join(feat_folder_path))
        fsf_csv=combine_for_csv(fsf_to_csv(fsf_single))
        one_lines=list()
        for row in fsf_to_one_column(fsf_single):
            one_lines.append(row+'\n')
        write_report(fsf_csv,out_path+'.csv')
        write_report(one_lines,out_path+'_one.csv')
Example #12
0
arg_parser.add_argument("--extract",
                        action="store_true",
                        help="Run the extractor.")
arg_parser.add_argument("--scrape",
                        action="store_true",
                        help="Run the scraper.")
arg_parser.add_argument("--headless",
                        action="store_true",
                        help="No window for selenium scraper.")
args = arg_parser.parse_args()

if __name__ == "__main__":
    if not path.exists(FILES_DIR_PATH):
        os.mkdir(FILES_DIR_PATH)

    if args.scrape:
        urls = read_input_from_csv(args.csv_file)
        with DetailPageScraper(args.headless) as scraper:
            for url in tqdm(urls):
                scraper.make_request(url)

    if args.extract:
        parser = DetailPageParser()
        files = get_files_to_parse()
        data = []
        for file_name in tqdm(files):
            with open(file_name, "r") as file:
                data.append(parser.extract(file.read()))

        write_report(data)
def main():

    parser = argparse.ArgumentParser(
        description='Process input information for web scrapping.')
    parser.add_argument('action',
                        type=str,
                        choices=['scrape', 'report'],
                        help='define the scrapping action')
    parser.add_argument('-c',
                        default='criteria.json',
                        help='criteria file to scrapping against')
    parser.add_argument(
        '-o',
        default='report.json',
        help='report file generated against specified criteria')
    parser.add_argument('--url',
                        default=settings.ROOT_URL,
                        help='url to scrape information from')

    args = parser.parse_args()

    criteria_data = utils.get_criteria(args.c)

    if args.action == 'scrape':

        criteria_cut_off_date = dt_parser.parse(criteria_data['cut_off_date'])

        response = requests.get(args.url)

        parsed_html = BeautifulSoup(response.text, 'html.parser')
        articles = parsed_html.body.find_all('div', attrs={'class': 'post'})

        db_utils.clean_db(settings.DATABASE)

        for article in articles:
            article_publish_date_str = article.find(
                'p', attrs={
                    'class': 'fusion-single-line-meta'
                }).contents[4].text
            article_publish_date = dt_parser.parse(article_publish_date_str)

            if article_publish_date < criteria_cut_off_date:
                continue
            else:
                article_data = utils.fetch_articles_from_site(article)
                article_data['date'] = article_publish_date

                db_utils.write_articles_to_db(settings.DATABASE, article_data)

    if args.action == 'report':

        fetched_db_data = db_utils.fetch_articles_from_db(
            settings.DATABASE, criteria_data)

        serialized_data = {
            'criteria':
            criteria_data,
            'common_words':
            list(itertools.chain(*(row[1] for row in fetched_db_data))),
            'articles': [
                utils.serialize_article_from_db_row(row[0])
                for row in fetched_db_data
            ]
        }

        utils.write_report(serialized_data, args.o)
Example #14
0
def main(args):
    """ Main function for classification with imputed dataset
    
    Args:
        - from_id: start index to file list
        - to_id: end index to file list
        - fold_size: fold_size start from index 1 

    Returns:
        -     
    """
    # Input parameters
    from_id = args.from_id
    to_id = args.to_id
    fold_size = args.fold_size

    # Initial parameters
    binary_classifiers = [1, 1, 1, 1]  # 1: Activate or 0: Deactivate
    classfication_flag = [
        i for i, clsf in enumerate(binary_classifiers) if clsf == 1
    ]
    missingness_flag = [0, 10, 20, 30, 40, 50]  # t% missing data

    # Loading data
    for i_file in range(from_id, to_id):
        file_name = file_list[i_file]
        print(datetime.datetime.now(), "File {}: {}".format(i_file, file_name))
        file_data_path = os.path.join(imputed_dataset, file_name)
        result_data_path = os.path.join(result_path, file_name)
        for name_imputation in os.listdir(file_data_path):
            for missing in missingness_flag:
                for clf_flag in classfication_flag:
                    dict_eval = {
                        'accuracy': [],
                        'p_macro': [],
                        'r_macro': [],
                        'f1_macro': [],
                        'p_micro': [],
                        'r_micro': [],
                        'f1_micro': []
                    }
                    for i in range(1, fold_size):
                        D_train, D_test = csv_reader(file_data_path,
                                                     name_imputation,
                                                     i,
                                                     method='data_missing',
                                                     missingness=missing)

                        features_D_train = D_train[:, :-1]
                        labels_D_train = D_train[:, -1].astype(np.int32)
                        features_D_test = D_test[:, :-1]
                        labels_D_test = D_test[:, -1].astype(np.int32)

                        classes = np.unique(labels_D_test)
                        n_classes = len(classes)

                        labels_predicted, name_classification_algo = model_prediction(
                            features_D_train, features_D_test, labels_D_train,
                            clf_flag, n_classes)
                        accuracy, p_macro, r_macro, f1_macro, p_micro, r_micro, f1_micro = evaluation_report(
                            labels_predicted, labels_D_test)
                        dict_eval['accuracy'].append(accuracy)
                        dict_eval['p_macro'].append(p_macro)
                        dict_eval['r_macro'].append(r_macro)
                        dict_eval['f1_macro'].append(f1_macro)
                        dict_eval['p_micro'].append(p_micro)
                        dict_eval['r_micro'].append(r_micro)
                        dict_eval['f1_micro'].append(f1_micro)

                    write_report(dict_eval, result_data_path, name_imputation,
                                 missing, name_classification_algo)