Example #1
0
 def __init__(self):
     """
     Initiator class to load all necessary configurations, logger and HTTP client instances
     """
     config_parser = ConfigParser()
     self.config = config_parser.return_json(
         path_to_config='TOREPLACE/config/example.json')
Example #2
0
    def __init__(self,
                 theApkObj,
                 theAvdName,
                 decompressDir,
                 runHeadless,
                 theLogger=Logger()):
        Thread.__init__(self)
        # configParser
        self.configParser = ConfigParser()

        self.apkObj = theApkObj
        self.log = theLogger
        self.curDir = os.path.dirname(__file__)

        self.staticAnalyzer = None
        self.dynamicAnalyzer = None
        self.logcatAnalyzer = None

        self.startTimeStr = None
        self.endTimeStr = None

        self.emulator = None
        self.emulatorPort = 5554
        self.avdName = theAvdName
        self.runHeadless = runHeadless

        self.decompressPath = decompressDir
        self.logcatFile = None

        self.session = None

        self.cancelFlag = False  # Flag for canceling run
Example #3
0
    def __init__(self, theConfigFilePath, theLogger=Logger()):
        # parse config file
        self.configParser = ConfigParser()
        # self.configParser.parseFile(theConfigFilePath)
        self.configParser.generateDirectories()

        self.log = theLogger

        # keytool path to parse apk's signature
        self.keytoolPath = None

        # sanddroid directories
        self.mainDir = os.path.dirname(__file__)

        self.appList = []  # list to store apk file - full path
        self.runningApps = []  # list to store apk file which in being analyzed

        self.runHeadless = False
        self.emulatorStartPort = 5554

        self.numThreads = 1
        self.maxThreadRuntime = 600

        # control running threads
        self.threadLogFileList = []  # list to store thread log file path
        self.numFinishedApps = 0  # number of analyzed apps
        self.numRunningThreads = 0  # number of running threads
        self.threadList = []  # list of threads, size=numThreads
        self.threadActiveMask = [
        ]  # bitmask to determine if thread is active, size=numThreads

        self.avdsheartbeat = (0, 0, 0, 0)  # list avds' times used in one cycle
        self.avdheartbeat = 0
        self.startTime = datetime.datetime.now()
Example #4
0
    def load_from(config_meta):
        with open(config_meta['path']) as f:
            config = yaml.load(f)
            parser = ConfigParser(config)
            parser._create_directories()

        task = Task.load_from(parser.task)
        dataset = Dataset.load_from(parser.dataset)
        model_config = config['model']
        label_helper = Label.load_from(parser.label)
        user = config['user']

        # Set up logger
        log_level = config_meta['log_level']
        logger = logging.getLogger('label_app')
        logger.setLevel(getattr(logging, log_level))

        ch = logging.StreamHandler(sys.stdout)
        ch.setFormatter(
            logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
        logger.addHandler(ch)

        return LabelApp(task, dataset, label_helper, user, model_config,
                        parser, logger)
Example #5
0
def main():
    """
    Main function.
    """
    # parse args
    args = parse_argument()

    # load main config file and set logging
    main_config = ConfigParser(args.config_file)
    set_logging(log_file=main_config.get_str('log_file'))

    # initialize model manager object
    model_manager = ModelManager()

    # parse config
    classifier = main_config.get_str('classifier')
    pre_built_models_dir = os.path.join(main_config.get_str('pre_built_models_dir'), classifier)
    num_classifiers = main_config.get_int('num_classifiers')

    # we need to build the models first if they do not exist
    if not dir_exists(pre_built_models_dir):
        save_models(
            classifier,
            pre_built_models_dir,
            main_config,
            model_manager,
            num_classifiers)

    make_recommendation(
        classifier,
        pre_built_models_dir,
        main_config,
        model_manager,
        num_classifiers)
Example #6
0
    def __init__(self, config_filepath):
        """
        Class initializer.

        Inputs:
            config_filepath: (str) Configuration filepath.
        """
        self.configparser = ConfigParser(config_filepath)

        # read configuration file
        self.filepath = self.configparser.getstr('filepath')
        self.full_ontology_pkl = self.configparser.getstr('full_ontology_pkl')
        self.candidate_ontology_pkl = self.configparser.getstr(
            'candidate_ontology_pkl')
        self.skeleton_and_entities_pkl = self.configparser.getstr(
            'skeleton_and_entities_pkl')
        self.overwrite_pkl = self.configparser.getbool('overwrite_pickle_flag')
        self.outputFoodOn = self.configparser.getstr('outputFoodOn')

        self.num_seeds = self.configparser.getint('num_seeds')
        self.num_min_extracted_entities = self.configparser.getint(
            'num_min_extracted_entities')

        # generate pairs from csv file
        self.pd_foodon_pairs = self.generate_pairs()
        self.all_classes, self.all_entities = self.get_classes_and_entities()
        self.foodon_graph, self.graph_dict, self.graph_dict_flip = self.generate_graph(
        )
Example #7
0
    def __init__(self, config_filepath):
        """
        Class initializer.

        Inputs:
            config_filepath: (str) Configuration filepath.
        """
        self.configparser = ConfigParser(config_filepath)
Example #8
0
    def __init__(self, module: str, cli: str):
        self.current_env = os.environ.copy()
        self.json_data = ConfigParser(
            path="/home/vlad/infra/armature/armature/conf/modules.json"
        ).return_json()

        self.module = module
        self.cli = cli
        self.module_data = self.json_data['modules'][module]
Example #9
0
    def __init__(self, config_file):
        """
        Class initializer.

        Inputs:
        """
        self.configparser = ConfigParser(config_file)
        self.epoch_callback = EpochCallback()
        self.model = None
Example #10
0
    def load_from(config_path):
        with open(config_path) as f:
            config = yaml.load(f)
            parser = ConfigParser(config)
            parser._create_directories()

        task = Task.load_from(parser.task)
        dataset = Dataset.load_from(parser.dataset)
        model_config = config['model']
        label_helper = Label.load_from(parser.label)
        user = config['user']

        return LabelApp(task, dataset, label_helper, user, model_config,
                        parser)
Example #11
0
def save_models(classifier,
                pre_built_models_dir,
                main_config,
                model_manager,
                num_classifiers):
    log.info('Pre-built model directory specified for %s does not exist.', classifier)
    log.info('Building models again.')

    # create directory
    create_dir(pre_built_models_dir)

    # load config parsers
    preprocess_config = ConfigParser(main_config.get_str('preprocess_config'))
    classifier_config = ConfigParser(main_config.get_str('classifier_config'))
    classifier_config.overwrite('classifier', classifier)

    # perform preprocessing
    X, y = model_manager.preprocess(preprocess_config, section=classifier)

    # select subset of features if requested
    selected_features = main_config.get_str_list('selected_features')
    if selected_features:
        log.info('Selecting subset of features: %s', selected_features)
        X = X[selected_features]

    # train multiple classifiers
    for i in range(num_classifiers):
        log.debug('Processing classifier %d/%s', i+1, num_classifiers)

        cmanager = ClassifierManager(classifier_config)
        clf = CalibratedClassifierCV(cmanager.get_classifier(), method='sigmoid', cv=5)
        clf.fit(X, y)

        save_pkl(
            clf,
            os.path.join(pre_built_models_dir, 'model_{}.pkl'.format(i)))
Example #12
0
def main(config_path, epochs=3):
    # For pretraining, we do everything the same, except we replace the
    # dataset:judgements_file with model:pretrain_file.
    with open(config_path) as f:
        config = yaml.load(f)
        parser = ConfigParser(config)
        parser.dataset['judgements_file'] = parser.model['pretrain_file']

    task = Task.load_from(parser.task)
    dataset = PretrainJSONDataset(parser.dataset)
    model_config = config['model']
    label_helper = Label.load_from(parser.label)
    user = config['user']
    label_app = LabelApp(task, dataset, label_helper, user, model_config,
                         parser)
    label_app.trainer.load_existing()
    label_app.trainer.train_epochs(epochs=epochs)
Example #13
0
def main():
    """
    Main function.
    """
    # set log, parse args, and read configuration
    args = parse_argument()
    configparser = ConfigParser(args.config_file)
    set_logging(configparser.getstr('logfile'))

    # parse FoodOn
    parse_foodon = ParseFoodOn(configparser.getstr('foodon_parse_config'))
    classes_dict = parse_foodon.get_candidate_classes()
    classes_dict_skeleton, candidate_entities = parse_foodon.get_seeded_skeleton(
        classes_dict)

    # run
    scoring_manager = ScoringManager(classes_dict_skeleton, candidate_entities,
                                     configparser.getstr('scoring_config'))

    scoring_manager.run_iteration()
Example #14
0
def main():
    """
    Main function.
    """
    # parse args
    args = parse_argument()

    # load main config file and set logging
    main_config = ConfigParser(args.config_file)
    set_logging(log_file=main_config.get_str('log_file'))

    # initialize model manager object
    model_manager = ModelManager()

    # baseline / best classifiers
    baseline_classifier = main_config.get_str('baseline')
    best_classifier = main_config.get_str('classifier')

    # plot PR curve and print confusion matrix
    plot_pr_print_cm(baseline_classifier, best_classifier, main_config,
                     model_manager)
Example #15
0
def main():
    """
    Main function.
    """
    # set log, parse args, and read configuration
    set_logging()
    args = parse_argument()
    configparser = ConfigParser(args.config_file)

    # load data to train with
    sentence_column = configparser.getstr('sentence_column')

    pd_data = pd.read_csv(configparser.getstr('input_filepath'), sep='\t')

    pd_data.fillna('', inplace=True)
    pd_data = pd_data[pd_data[sentence_column] != '']

    # use specified column as sentences
    sentences = pd_data[sentence_column].tolist()
    sentences = [sentence.split() for sentence in sentences]

    # init word2vec manager
    w2vm = Word2VecManager(args.config_file)

    # start training and load pre-training data if prompted
    if configparser.getbool('pre_train'):
        pretrained = configparser.getstr('pre_trained_vectors')
    else:
        pretrained = None

    w2vm.train(sentences, pretrained=pretrained)

    # save word embeddings and model
    w2vm.save_model(configparser.getstr('model_saveto'))
    w2vm.save_vectors(configparser.getstr('vectors_saveto'))
    w2vm.save_loss(configparser.getstr('loss_saveto'))
Example #16
0
def plot_pr_print_cm(baseline_classifier, best_classifier, main_config,
                     model_manager):
    classifiers_ys = {}

    for classifier in [baseline_classifier, best_classifier]:
        log.info('Running model for classifier \'%s\'', classifier)

        # load config parsers
        preprocess_config = ConfigParser(
            main_config.get_str('preprocess_config'))
        classifier_config = ConfigParser(
            main_config.get_str('classifier_config'))

        # perform preprocessing
        X, y = model_manager.preprocess(preprocess_config, section=classifier)

        # select subset of features if requested
        selected_features = main_config.get_str_list('selected_features')
        if selected_features:
            log.info('Selecting subset of features: %s', selected_features)
            X = X[selected_features]

        # run classification model
        classifier_config.overwrite('classifier', classifier)

        score_avg, score_std, ys = model_manager.run_model_cv(
            X, y, 'f1', classifier_config)

        classifiers_ys[classifier] = ys

    # confusion matrix
    (y_trues, y_preds, y_probs) = classifiers_ys[best_classifier]

    tn = []
    fp = []
    fn = []
    tp = []

    pred_pos = []
    pred_neg = []
    known_pos = []
    known_neg = []

    f1 = []
    precision = []
    recall = []
    specificity = []
    npv = []
    fdr = []
    accuracy = []
    for fold in range(len(y_trues)):
        cm_result = confusion_matrix(y_trues[fold], y_preds[fold]).ravel()
        tn.append(cm_result[0])
        fp.append(cm_result[1])
        fn.append(cm_result[2])
        tp.append(cm_result[3])

        pred_pos.append(cm_result[3] + cm_result[1])
        pred_neg.append(cm_result[2] + cm_result[0])
        known_pos.append(cm_result[3] + cm_result[2])
        known_neg.append(cm_result[1] + cm_result[0])

        f1.append(f1_score(y_trues[fold], y_preds[fold]))
        precision.append(
            precision_score(y_trues[fold], y_preds[fold], average='binary'))
        recall.append(
            recall_score(y_trues[fold], y_preds[fold], average='binary'))
        specificity.append(cm_result[0] / (cm_result[0] + cm_result[1]))
        npv.append(cm_result[0] / (cm_result[0] + cm_result[2]))
        fdr.append(cm_result[1] / (cm_result[1] + cm_result[3]))
        accuracy.append(accuracy_score(y_trues[fold], y_preds[fold]))

    tn_mean = np.mean(tn)
    fp_mean = np.mean(fp)
    fn_mean = np.mean(fn)
    tp_mean = np.mean(tp)
    pred_pos_mean = np.mean(pred_pos)
    pred_neg_mean = np.mean(pred_neg)
    known_pos_mean = np.mean(known_pos)
    known_neg_mean = np.mean(known_neg)
    f1_mean = np.mean(f1)
    precision_mean = np.mean(precision)
    recall_mean = np.mean(recall)
    specificity_mean = np.mean(specificity)
    npv_mean = np.mean(npv)
    fdr_mean = np.mean(fdr)
    accuracy_mean = np.mean(accuracy)

    tn_std = np.std(tn)
    fp_std = np.std(fp)
    fn_std = np.std(fn)
    tp_std = np.std(tp)
    pred_pos_std = np.std(pred_pos)
    pred_neg_std = np.std(pred_neg)
    known_pos_std = np.std(known_pos)
    known_neg_std = np.std(known_neg)
    f1_std = np.std(f1)
    precision_std = np.std(precision)
    recall_std = np.std(recall)
    specificity_std = np.std(specificity)
    npv_std = np.std(npv)
    fdr_std = np.std(fdr)
    accuracy_std = np.std(accuracy)

    log.info(
        'Confusion matrix (tp, fp, fn, tn): (%.2f±%.2f, %.2f±%.2f, %.2f±%.2f, %.2f±%.2f)',
        tp_mean, tp_std, fp_mean, fp_std, fn_mean, fn_std, tn_mean, tn_std)
    log.info('pred pos: %.2f±%.2f', pred_pos_mean, pred_pos_std)
    log.info('pred neg: %.2f±%.2f', pred_neg_mean, pred_neg_std)
    log.info('known pos: %.2f±%.2f', known_pos_mean, known_pos_std)
    log.info('known neg: %.2f±%.2f', known_neg_mean, known_neg_std)
    log.info('F1: %.2f±%.2f', f1_mean, f1_std)
    log.info('Precision: %.2f±%.2f', precision_mean, precision_std)
    log.info('Recall: %.2f±%.2f', recall_mean, recall_std)
    log.info('Specificity: %.2f±%.2f', specificity_mean, specificity_std)
    log.info('Npv: %.2f±%.2f', npv_mean, npv_std)
    log.info('Fdr: %.2f±%.2f', fdr_mean, fdr_std)
    log.info('Accuracy: %.2f±%.2f', accuracy_mean, accuracy_std)

    # plot PR curve
    fig = plt.figure()

    lines = []
    labels = []
    for classifier, ys in classifiers_ys.items():
        y_trues, y_preds, y_probs = ys

        if classifier == best_classifier:
            num_folds = len(y_trues)
            precision = 0
            recall = 0

            for fold in range(num_folds):
                precision += precision_score(y_trues[fold],
                                             y_preds[fold],
                                             average='binary')
                recall += recall_score(y_trues[fold],
                                       y_preds[fold],
                                       average='binary')

            precision /= num_folds
            recall /= num_folds

            arrowprops = {'arrowstyle': '->'}
            plt.scatter(recall, precision, s=30, marker='x', c='k', zorder=3)
            plt.annotate('Operational point', (recall, precision),
                         (recall - 0.05, precision + 0.05),
                         arrowprops=arrowprops)

        y_probs_1 = tuple(y_prob[1].to_numpy() for y_prob in y_probs)
        line, label = plot_pr(y_trues, y_probs_1, classifier)

        lines.append(line)
        labels.append(label)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Best model ({}) PR curve'.format(best_classifier))
    plt.legend(lines, labels, loc='upper right', prop={'size': 10})

    save_figure(fig, main_config.get_str('pr_curve'))
Example #17
0
File: scoring.py Project: IBPA/LOVE
    def __init__(self, candidate_classes_info, candidate_entities,
                 scoring_config):
        """
        Class initializer.
        """
        # config parser
        if isinstance(scoring_config, str):
            scoring_config = ConfigParser(scoring_config)

        # save arguments
        self.candidate_classes_info = candidate_classes_info
        self.candidate_entities = candidate_entities

        # parse config file
        self.alpha = scoring_config.getfloat('alpha')
        self.num_mapping_per_iteration = scoring_config.getint(
            'num_mapping_per_iteration')
        self.initial_siblings_scores = scoring_config.getstr(
            'initial_siblings_scores')
        self.initial_parents_scores = scoring_config.getstr(
            'initial_parents_scores')
        self.pairs_filepath = scoring_config.getstr('pairs_filepath')
        self.populated_filepath = scoring_config.getstr('populated_filepath')
        self.preprocess_config_filepath = scoring_config.getstr(
            'preprocess_config')
        self.similarity_method = scoring_config.getstr('similarity_method')

        log.debug('alpha: %f', self.alpha)
        log.debug('num_mapping_per_iteration: %d',
                  self.num_mapping_per_iteration)
        log.debug('initial_siblings_scores: %s', self.initial_siblings_scores)
        log.debug('initial_parents_scores: %s', self.initial_parents_scores)
        log.debug('pairs_filepath: %s', self.pairs_filepath)
        log.debug('populated_filepath: %s', self.populated_filepath)
        log.debug('similarity_method: %s', self.similarity_method)

        # preprocess manager
        self.fpm = FdcPreprocessManager(self.preprocess_config_filepath)

        # number of candidate classes & entities
        self.num_candidate_classes = len(self.candidate_classes_info)
        self.num_candidate_entities = len(self.candidate_entities)

        log.debug('Number of candidate classes: %d',
                  self.num_candidate_classes)
        log.debug('Number of candidate entities: %d',
                  self.num_candidate_entities)

        # extract the seeded entities to make complete list of entities
        seed_entities = self._unpack_sublist(
            [x[1] for _, x in self.candidate_classes_info.items()])
        self.all_entity_labels = list(
            set(self.candidate_entities + seed_entities))

        # all labels of candidate class
        self.candidate_classes_label = list(self.candidate_classes_info.keys())

        # complete list of class labels
        other_classes = self._unpack_sublist(
            [x[0] for _, x in self.candidate_classes_info.items()], depth=2)
        self.all_class_labels = list(
            set(self.candidate_classes_label + other_classes))

        # calculate embedding lookup table for class / entity labels
        if 'we_' in self.similarity_method:
            self.keyed_vectors = KeyedVectors.load_word2vec_format(
                scoring_config.getstr('word_embeddings'))
            # self.keyed_vectors.save('./output/glove_wiki_embeddings.bin')
            # self.keyed_vectors = KeyedVectors.load('./output/glove_wiki_embeddings.bin')

            self.pd_class_label_embeddings = self._calculate_label_embeddings(
                self.all_class_labels)
            self.pd_entity_label_embeddings = self._calculate_label_embeddings(
                self.all_entity_labels)

            # save_pkl(self.pd_class_label_embeddings, './output/pd_class_label_embeddings.pkl')
            # save_pkl(self.pd_entity_label_embeddings, './output/pd_entity_label_embeddings.pkl')
            # sys.exit()

            # self.pd_class_label_embeddings = load_pkl('./output/pd_class_label_embeddings.pkl')
            # self.pd_entity_label_embeddings = load_pkl('./output/pd_entity_label_embeddings.pkl')

        # do initial calculation of the scores
        self.pd_siblings_scores, self.pd_parents_scores = self._calculate_initial_scores(
        )
Example #18
0
def main():
    """
    Main function.
    """
    # set log, parse args, and read configuration
    set_logging(log_level=log.INFO)
    args = parse_argument()
    configparser = ConfigParser(args.config_file)

    # need to apply preprocessing
    fpm = FdcPreprocessManager(configparser.getstr('preprocess_config'))

    # read FoodOn vocabs
    labels = []
    pd_foodon_pairs = pd.read_csv('./data/FoodOn/foodonpairs.txt', sep='\t')
    labels.extend(pd_foodon_pairs['Parent'].tolist())
    labels.extend(pd_foodon_pairs['Child'].tolist())
    labels = list(set(labels))

    log.info('Number of unique labels: %d', len(labels))

    processed_labels = fpm.preprocess_column(pd.Series(labels),
                                             load_model=True).tolist()
    queries = processed_labels.copy()
    for processed_label in processed_labels:
        queries.extend(processed_label.split())
    queries = list(set(queries))

    # get summaries of the wikipedia entry
    wm = WikipediaManager()

    # check if we're gonna reuse the previous results
    if configparser.getbool('reuse_previous'):
        prev_summary = configparser.getstr('prev_summaries_filepath')
        prev_failed = configparser.getstr('prev_failed_filepath')
    else:
        prev_summary = None
        prev_failed = None

    pd_summary, pd_failed = wm.get_summary(queries,
                                           prev_summary=prev_summary,
                                           prev_failed=prev_failed)

    # save results
    log.info('Saving successfully pulled wiki summaries to %s',
             configparser.getstr('summaries_filepath'))

    pd_summary.to_csv(configparser.getstr('summaries_filepath'),
                      sep='\t',
                      index=False)

    log.info('Saving failed wiki queries to %s',
             configparser.getstr('failed_filepath'))

    pd_failed.to_csv(configparser.getstr('failed_filepath'),
                     sep='\t',
                     index=False)

    # preprocess columns
    pd_summary['summary_preprocessed'] = fpm.preprocess_column(
        pd_summary['summary'], load_model=True)

    output_filepath = configparser.getstr('preprocessed_output')

    log.info('Saving preprocessed wikipedia data to %s...', output_filepath)
    pd_summary.to_csv(output_filepath, sep='\t', index=False)
Example #19
0
    print("==> Stitching frames to create final output videos")
    stitch_videos(model_output_path, cfg.paths.frames, predictions_dict)

    # Delete frame directories
    for video in predictions_dict.keys():
        directory_path = f"{model_output_path}/{video}"
        shutil.rmtree(directory_path)

    print("==> Generating confusion matrix")
    metrics.compute_confusion_matrix(predictions_dict, classes,
                                     model_output_path)

    # Upload to AWS S3 only if bucket name is given in config
    if cfg.bucket:
        print("==> Creating zip file")
        zip_videos(model_output_path, cfg.name)

        print("==> Uploading to AWS S3")
        response = upload_videos(model_output_path, cfg.name, cfg.bucket)

        if response:
            print(f"Output download link: {response}")

    total_time = datetime.datetime.now() - start_time
    print(f"Total time: {total_time.total_seconds()}")


if __name__ == "__main__":
    cfg = ConfigParser().config
    main(cfg)
Example #20
0
def make_recommendation(classifier,
                        pre_built_models_dir,
                        main_config,
                        model_manager,
                        num_classifiers):
    if not dir_exists(pre_built_models_dir):
        raise RuntimeError('Pre-built model directory does not exist!')

    log.info('Using pre-built model directory: %s', pre_built_models_dir)

    # load config parsers
    preprocess_config = ConfigParser(main_config.get_str('preprocess_recommender_config'))
    classifier_config = ConfigParser(main_config.get_str('classifier_config'))
    classifier_config.overwrite('classifier', classifier)

    # perform preprocessing
    X, y = model_manager.preprocess(preprocess_config, section=classifier, final_model=True)

    # select subset of features if requested
    selected_features = main_config.get_str_list('selected_features')
    if selected_features:
        log.info('Selecting subset of features: %s', selected_features)
        X = X[selected_features]

    def _revert_column(pd_data):
        values = list(set(pd_data.tolist()))

        replace_dict = {}
        for value in values:
            replace_dict[value] = list(filter(lambda a: a != value, values))[0]

        return pd_data.replace(to_replace=replace_dict)

    # get test data and its inverse for TRT column
    X_inv = X.copy()
    X_inv['TRT'] = _revert_column(X_inv['TRT'])
    pos_trt_idx = (X['TRT'] == 1.0)

    y_probs = []
    y_probs_inv = []
    for i in range(num_classifiers):
        log.debug('Processing classifier %d/%s', i+1, num_classifiers)

        classifier_filepath = os.path.join(pre_built_models_dir, 'model_{}.pkl'.format(i))
        log.debug('Loading classifier: %s', classifier_filepath)
        clf = load_pkl(classifier_filepath)

        y_probs.append(clf.predict_proba(X)[:, 1])
        y_probs_inv.append(clf.predict_proba(X_inv)[:, 1])

    y_probs = pd.DataFrame(y_probs).T
    y_probs.index = X.index
    y_probs_inv = pd.DataFrame(y_probs_inv).T
    y_probs_inv.index = X.index

    # make recommendation
    y_probs_avg = y_probs.mean(axis=1)
    y_probs_inv_avg = y_probs_inv.mean(axis=1)

    y_probs_avg_diff = y_probs_avg - y_probs_inv_avg
    inv_minus_pos = y_probs_inv_avg - y_probs_avg
    y_probs_avg_diff[~pos_trt_idx] = inv_minus_pos[~pos_trt_idx]

    pval = pd.Series(index=X.index)
    for index, _ in pval.items():
        _, pval[index] = ttest_rel(y_probs.loc[index], y_probs_inv.loc[index])

    # calculate y_probs_trt / right now it's y_probs  ################
    pd_concat = pd.concat(
        [pos_trt_idx, y_probs_avg, y_probs_inv_avg, y_probs_avg_diff, pval], axis=1)
    pd_concat.columns = ['pos_trt', 'y_probs_avg', 'y_probs_inv_avg', 'y_probs_avg_diff', 'pval']

    print(pd_concat)
Example #21
0
def main():
    """
    Main function.
    """
    # parse args
    args = parse_argument()

    # load main config file and set logging
    main_config = ConfigParser(args.config_file)
    set_logging(log_file=main_config.get_str('log_file'))

    # initialize model manager object
    model_manager = ModelManager()

    # perform analysis on these classifiers
    classifiers = main_config.get_str_list('classifier')

    # do prediction
    classifiers_ys = {}
    for classifier in classifiers:
        log.info('Running model for classifier \'%s\'', classifier)

        # load config parsers
        preprocess_config = ConfigParser(
            main_config.get_str('preprocess_config'))
        classifier_config = ConfigParser(
            main_config.get_str('classifier_config'))

        # perform preprocessing
        X, y = model_manager.preprocess(preprocess_config, section=classifier)

        # run classification model
        classifier_config.overwrite('classifier', classifier)

        X = model_manager.feature_selector(X, y, classifier_config)

        score_avg, score_std, ys = model_manager.run_model_cv(
            X, y, 'f1', classifier_config)

        classifiers_ys[classifier] = ys

    # plot PR curve
    fig = plt.figure()

    lines = []
    labels = []
    for classifier, ys in classifiers_ys.items():
        y_trues, y_preds, y_probs = ys

        y_probs_1 = tuple(y_prob[1].to_numpy() for y_prob in y_probs)
        line, label = plot_pr(y_trues, y_probs_1, classifier)

        lines.append(line)
        labels.append(label)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('PR Curve')
    plt.legend(lines, labels, loc='lower right', prop={'size': 8})

    save_figure(
        fig,
        os.path.join(main_config.get_str('visualization_dir'), 'pr_curve.png'))

    # plot ROC curve
    fig = plt.figure()

    lines = []
    labels = []
    for classifier, ys in classifiers_ys.items():
        y_trues, y_preds, y_probs = ys

        y_probs_1 = tuple(y_prob[1].to_numpy() for y_prob in y_probs)
        line, label = plot_roc(y_trues, y_probs_1, classifier)

        lines.append(line)
        labels.append(label)

    # plt.plot([0, 1], [0, 1], color='k', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(lines, labels, loc='lower right', prop={'size': 8})

    save_figure(
        fig,
        os.path.join(main_config.get_str('visualization_dir'),
                     'roc_curve.png'))
Example #22
0
 def __init__(self, config_filepath):
     configparser = ConfigParser(config_filepath)
     gt_ontology_filename = configparser.getstr('gt_entitymapping')
     self.gt_ontology = load_pkl(gt_ontology_filename)
Example #23
0
# -*- coding: utf-8 -*-
from db.db_worker import DBWorker
from db.sql_requests import SQLRequests
from utils.config_parser import ConfigParser
from utils.settings_parser import SettingsParser
import logging

if __name__ == "__main__":

    config = ConfigParser().get_config_settings()
    settings = SettingsParser().get_test_settings()

    logging.basicConfig(filename=config['log_filename'],
                        level=logging.INFO,
                        format=config['log_format'])
    logger = logging.getLogger()

    db_worker = DBWorker(config['host'], config['user'], config['password'],
                         config['database'])
    sql_request = SQLRequests(db_worker, settings)

    try:
        db_worker.connect()

        logger.info('Шаг 1')

        sql_request.get_min_working_time()

        logger.info('-' * 200)

        logger.info('Шаг 2')
Example #24
0
import click
from modules.executor import Executor
from utils.config_parser import ConfigParser

json_data = ConfigParser(
    path="/home/vlad/infra/armature/armature/conf/modules.json").return_json()

MODULE = "packer"


@click.group()
def cli():
    pass


@cli.command()
def prepare_template():
    """Validate configuration file"""
    click.echo('prepare_template')

    with Executor(module=MODULE, cli="prepare_template") as cli_executor:
        cli_executor.run(cli="prepare_template", use_docker_run_wrapper=True)


@cli.command()
def validate_template():
    """Validate configuration file"""
    click.echo('validate_template')

    with Executor(module=MODULE, cli="validate_template") as cli_executor:
        cli_executor.run(cli="validate_template", use_docker_run_wrapper=True)
Example #25
0
def main():
    """
    Main function.
    """
    # parse args
    args = parse_argument()

    # load main config file and set logging
    main_config = ConfigParser(args.config_file)
    set_logging(log_file=main_config.get_str('log_file'))

    # initialize model manager object
    model_manager = ModelManager()

    # run models for all possible combination of preprocessing
    scale_modes = main_config.get_str_list('scale_mode')
    mvi_modes = main_config.get_str_list('mvi_mode')
    outlier_modes = main_config.get_str_list('outlier_mode')
    classifiers = main_config.get_str_list('classifier')

    classifier_score_dict = {classifier: 0 for classifier in classifiers}
    classifier_best_combination_dict = {
        classifier: None
        for classifier in classifiers
    }
    all_combinations = [scale_modes, mvi_modes, outlier_modes, classifiers]
    all_combinations = list(itertools.product(*all_combinations))
    failed_combinations = []

    for idx, combination in enumerate(all_combinations):
        # unpack the tuple
        scale_mode = combination[0]
        mvi_mode = combination[1]
        outlier_mode = combination[2]
        classifier = combination[3]

        # log current combination
        combination_str_joined = ', '.join(list(combination))
        log.info('Running grid search %d/%d: (%s)', idx + 1,
                 len(all_combinations), combination_str_joined)

        # some classifiers must use minmax scaler
        if classifier in ['MultinomialNB', 'CategoricalNB'
                          ] and scale_mode != 'minmax':
            log.info('Skipping this combination...')
            continue

        # overwrite the config file using the current combination
        preprocess_config = ConfigParser(
            main_config.get_str('preprocess_config'))
        classifier_config = ConfigParser(
            main_config.get_str('classifier_config'))

        preprocess_config.overwrite('scale_mode', scale_mode)
        preprocess_config.overwrite('mvi_mode', mvi_mode)
        preprocess_config.overwrite('outlier_mode', outlier_mode)
        classifier_config.overwrite('classifier', classifier)

        # perform preprocessing
        X, y = model_manager.preprocess(preprocess_config)

        # run classification model
        try:
            score = model_manager.grid_search(
                X, y, main_config.get_str('optimize_scoring'),
                classifier_config,
                main_config.get_str('updated_classifier_config'))
        except (IndexError, ValueError) as e:
            failed_combinations.append(combination_str_joined)
            log.error(e)
            continue

        # update the best preprocessing combination
        if classifier_score_dict[classifier] < score:
            classifier_score_dict[classifier] = score
            classifier_best_combination_dict[
                classifier] = combination_str_joined

    log.info('Best %s score for each classifier: %s',
             main_config.get_str('optimize_scoring'), classifier_score_dict)

    log.info(
        'Preprocessing combination of the best %s score for each classifier: %s',
        main_config.get_str('optimize_scoring'),
        classifier_best_combination_dict)

    log.info('%d failed combinations: %s', len(failed_combinations),
             failed_combinations)