Beispiel #1
0
def main():
    parameters_maxent = parse_args()
    # print(parameters_maxent)

    output_dir = parameters_maxent["output_directory"]

    # train = WLPDataset(gen_rel_feat=True, prep_emb=False, dir_path=parameters_maxent["train_data"])
    # test = WLPDataset(gen_rel_feat=True, prep_emb=False, dir_path=parameters_maxent["test_data"])

    shutil.rmtree(output_dir, ignore_errors=True)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    try:
        train = pickle.load(open(cfg.Train_Dataset_PICKLE, 'rb'))
    except Exception as e:
        train = WLPDataset(gen_rel_feat=True,
                           prep_emb=False,
                           dir_path=parameters_maxent["train_data"])
        pickle.dump(train, open(cfg.Train_Dataset_PICKLE, 'wb'))

    try:
        test = pickle.load(open(cfg.Test_Dataset_PICKLE, 'rb'))
    except Exception as e:
        test = WLPDataset(gen_rel_feat=True,
                          prep_emb=False,
                          dir_path=parameters_maxent["test_data"])
        pickle.dump(test, open(cfg.Test_Dataset_PICKLE, 'wb'))

    train_df, y_train = train.extract_rel_data()
    test_df, y_test = test.extract_rel_data()

    word_features = [
        'wm1', 'wbnull', 'wbf', 'wbl', 'wbo', 'bm1f', 'bm1l', 'am2f', 'am2l'
    ]
    ent_features = ['et12']
    overlap_features = ['#mb', '#wb']
    chunk_features = [
        'cphbnull', 'cphbfl', 'cphbf', 'cphbl', 'cphbo', 'cphbm1f', 'cphbm1l',
        'cpham2f', 'cpham2l'
    ]
    dep_features = [
        'et1dw1', 'et2dw2', 'h1dw1', 'h2dw2', 'et12SameNP', 'et12SamePP',
        'et12SameVP'
    ]

    addition = [
        word_features + ent_features + overlap_features + chunk_features +
        dep_features,
    ]

    for feat in addition:
        # print(feat)
        x_train = train.features.tranform(train_df, feat)
        x_test = train.features.tranform(test_df, feat)
        single_run(train, x_train, y_train, test, x_test, y_test, output_dir)

    # evaluation.find_perfomance(gold_data_location=parameters_maxent["test_data"], pred_data_location=output_dir)
    evaluation.main()
Beispiel #2
0
def main(workdir, dataset, identifier):
    helpers.make_dirs(workdir, identifier)
    preprocessing.main(workdir, dataset, identifier)
    build_corpus.main(workdir, identifier)
    modeling.main(workdir, identifier, numtopics, passes)
    postprocessing.main(workdir, dataset, identifier, numtopics)
    make_overview.main(workdir, identifier)
    make_heatmap.main(workdir, identifier)
    make_wordclouds.main(workdir, identifier, numtopics)
    evaluation.main(workdir, identifier, numtopics)
def main(workdir, dataset, identifier, numtopics, passes, lang, cats):
    print("==", "starting", "==", "\n==", helpers.get_time(), "==")
    helpers.make_dirs(workdir, identifier)
    preprocessing.main(workdir, dataset, identifier, lang)
    build_corpus.main(workdir, identifier)
    modeling.main(workdir, identifier, numtopics, passes)
    postprocessing.main(workdir, dataset, identifier, numtopics)
    make_overview.main(workdir, identifier)
    make_heatmap.main(workdir, identifier, cats)
    make_wordclouds.main(workdir, identifier, numtopics)
    evaluation.main(workdir, identifier, numtopics)
Beispiel #4
0
def main():
    parser = argparse.ArgumentParser(
        description="Code for the prediction and evaluation of AFNet.")
    parser.add_argument("dataset",
                        type=str,
                        choices=["potsdam", "vaihingen"],
                        help="choose dataset to do prediction or evaluation.")
    parser.add_argument("mode",
                        type=str,
                        choices=["prediction", "evaluation"],
                        help="whether do prediction or evaluation.")
    args = parser.parse_args()

    if args.mode == 'prediction':
        prediction.main(args.dataset)
    if args.mode == 'evaluation':
        evaluation.main(args.dataset)
Beispiel #5
0
def inference(image):
    return evaluation.main(image)
Beispiel #6
0
                idx_Y = np.where(classifier.classes_ == 1)
            else:
                idx_Y = np.where(classifier.classes_ == "Y")

            # for each instance, output instance name & probability of "Y" class
            for name, prob in dict(zip(names_test, probabilities)).items():
                prob_Y = prob[idx_Y]
                log.debug(prob_Y)
                answers.write("%s %f\n" % (name, prob_Y))

        log.info(u"Wrote answers to '{0:s}'".format(answers_path))
        answers.close()

        # find truth file and evaluate results
        truth_path = os.path.join(args.training, "truth.txt")
        print(evaluation.main(truth_path, answers_path), file=sys.stderr)

        # and done
        log.info("End after cross validation.")
        sys.exit(0)  # End after CV.

    elif args.training:  # generate train & test instance arrays from train & test
        log.info("Running on {}".format(args.training))
        test = test_names = test_labels = None

        train, _, train_labels = build_dataset(args.training)
        train = __scale_features(train)
        log.debug(train)
        log.info("Length of feature set: {}".format(len(train[0])))
        log.debug('TRAIN LABELS:')
        log.debug(train_labels)
Beispiel #7
0
def execute(node, previous, experiment_folder):
    """
    Execute a task defined by the given node in the experiment graph.
    
    Parameters
    ----------
    
    node : Element
        The node to be executed.
        
    previous : dict (or list of dict)
        Dictionary of the experiment's running-time variables after the
        end of the parent node's execution.
        May be a list of dictionaries in the special case of a fusion node,
        which has more than one parent.
    
    experiment_folder : string
        String with the path to the experiment folder, where the files of the
        experiment will be saved.
        
    Returns
    -------
    
    exp_param : dict
        The updated dictionary of the experiment's running-time variables after
        the node's execution.
    
    """

    global execution_time
    global tex_path
    global tex_dict
    global openset_experiment

    exp_param = previous
    parameters = ast.literal_eval(node.get("parameters"))
    node_id = node.attrib['id']

    #Get node name
    node_name = node.get('name')

    if node.tag == "collection":
        print "Collection", exp_param.keys()

        images, classes, extract_path, read_time = \
                read_collection.main(node_name, openset_experiment, parameters,
                node_id)
        execution_time += read_time

        exp_param['images'] = images
        exp_param['classes'] = classes
        exp_param['extract_path'] = extract_path

    elif node.tag == "train_test_method":
        print "train_test_method", exp_param.keys()

        images = exp_param['images']
        classes = exp_param['classes']

        images, classes, train_test_list, train_test_time = \
                train_test.main(images, classes, experiment_folder, node_name,
                parameters, openset_experiment, node_id)
        execution_time += train_test_time

        exp_param['images'] = images
        exp_param['classes'] = classes
        exp_param['train_test_list'] = train_test_list

        exp_param['train_test_method'] = node_name
        exp_param['train_test_parameters'] = parameters

    elif node.tag == "descriptor":
        print "descriptor", exp_param.keys()

        images = exp_param['images']
        extract_path = exp_param['extract_path']
        classes_keys = exp_param['classes'].keys()

        if node_name == "bag":
            train_test_list = exp_param['train_test_list']

            images, extract_time = extract_bag.main(images, train_test_list,
                                                    extract_path,
                                                    experiment_folder,
                                                    parameters, node_id)

        elif node_name == "bovg":
            train_test_list = exp_param['train_test_list']

            images, extract_time = extract_bovg.main(images, train_test_list,
                                                     extract_path,
                                                     experiment_folder,
                                                     parameters, node_id)

        else:
            images, extract_time = extract_features.main(
                images, classes_keys, extract_path, node_name, parameters,
                node_id)

        execution_time += extract_time

        exp_param['images'] = images
        exp_param['descriptor'] = node_name

    elif node.tag == "normalizer":
        try:
            manager = Manager()
            images = manager.dict(exp_param['images'])
            train_test_list = exp_param['train_test_list']
        except:
            print "\n\tMissing Input. Exiting."
            sys.exit(1)

        norm_fv_paths, normalize_time = normalize_features.main(
            images, train_test_list, experiment_folder, node_name, parameters,
            node_id)
        execution_time += normalize_time

        del exp_param['images']
        exp_param['fv_paths'] = norm_fv_paths

    elif node.tag == "classifier":
        try:
            classes = exp_param['classes']
            train_test_list = exp_param['train_test_list']
            descriptor = exp_param['descriptor']
            try:
                fv_paths = exp_param['fv_paths']
                del exp_param['fv_paths']
            except:
                images = exp_param['images']
                fv_paths = util.save_file_extract(images, train_test_list,
                                                  experiment_folder)
        except:
            print "\n\tMissing Input. Exiting."
            sys.exit(1)

        images, classes_list, classify_time = classify.main(
            fv_paths, classes.keys(), train_test_list, experiment_folder,
            node_name, parameters, descriptor, node_id)
        execution_time += classify_time

        exp_param['images'] = images
        exp_param['classes_list'] = classes_list

    elif node.tag == "fusion_method":
        len_exp_param = len(exp_param)
        #list with the images dictionaries, classes dictionaries, and train and
        # test set list
        list_images = []
        list_classes = []
        list_train_test = []
        extract_path = exp_param[INDEX_ZERO]['extract_path']

        for index in range(len_exp_param):
            try:
                list_images.append(exp_param[index]['images'])
            except:
                images = {}
                for fv_path in exp_param[index]['fv_paths']:
                    print "fv_path:", fv_path
                    images_new = util.read_fv_file(fv_path)
                    images = util.merge_dict(images, images_new)
                list_images.append(images)

            list_classes.append(exp_param[index]['classes'])
            #In case that it performs the fusion of collections, there is no
            # train_test_list
            try:
                list_train_test.append(exp_param[index]['train_test_list'])
            except:
                list_train_test.append(None)
        #classes_list is present only after the classification module
        try:
            classes_list = exp_param[INDEX_ZERO]['classes_list']
        except:
            classes_list = None
        try:
            train_test_method = exp_param[INDEX_ZERO]['train_test_method']
            train_test_parameters = exp_param[INDEX_ZERO][
                'train_test_parameters']
        except:
            train_test_method = None
            train_test_parameters = None

        images, classes, train_test_list, fusion_time = \
                fusion.main(list_images, list_classes, list_train_test,
                        classes_list, experiment_folder, node_name, parameters,
                        node_id)
        execution_time += fusion_time

        exp_param = {}
        exp_param['images'] = images
        exp_param['classes'] = classes
        if train_test_list is not None:
            exp_param['train_test_list'] = train_test_list
        if classes_list is not None:
            exp_param['classes_list'] = classes_list
        if train_test_method is not None:
            exp_param['train_test_method'] = train_test_method
            exp_param['train_test_parameters'] = train_test_parameters
        exp_param['descriptor'] = None
        exp_param['extract_path'] = extract_path

    elif node.tag == "evaluation_measure":
        try:
            images = exp_param['images']
            train_test_list = exp_param['train_test_list']
            classes_list = exp_param['classes_list']
        except:
            print "\n\tMissing Input. Exiting."
            sys.exit(1)

        evaluation_time, evaluation_path = evaluation.main(
            images, train_test_list, classes_list, experiment_folder,
            node_name, parameters, node_id)
        execution_time += evaluation_time

        #Dictionaries to create the tex file
        train_test_method = exp_param['train_test_method']
        train_test_parameters = str(exp_param['train_test_parameters'])

        if train_test_method not in tex_dict:
            tex_dict[train_test_method] = {}
        train_test_dict = tex_dict[train_test_method]

        if train_test_parameters not in train_test_dict:
            train_test_dict[train_test_parameters] = {}
        output_dict = train_test_dict[train_test_parameters]

        if node_name not in output_dict:
            output_dict[node_name] = []
        list_output = [evaluation_path, classes_list[0], node_id]
        if list_output not in output_dict[node_name]:
            output_dict[node_name].append(list_output)

        train_test_dict[train_test_parameters] = output_dict
        tex_dict[train_test_method] = train_test_dict

    elif node.tag == "preprocessing":
        images = exp_param['images']
        classes = exp_param['classes']

        images, classes, preprocessing_time = preprocessing.main(
            images, classes, experiment_folder, node_name, parameters, node_id)
        execution_time += preprocessing_time

        exp_param['images'] = images
        exp_param['classes'] = classes

    else:
        print "Error. Unknown Tag."
        sys.exit(1)

    return exp_param
Beispiel #8
0
import preprocessing
import vsm_similarity
import token_matching
import stack_trace
import semantic_similarity
import fixed_bug_reports
import evaluation

print('Parsing & Preprocessing...')
preprocessing.main()

print('Token Matching...')
token_matching.main()

print('VSM Similarity...')
vsm_similarity.main()

print('Stack Trace...')
stack_trace.main()

print('Semantic Similarity...')
semantic_similarity.main()

print('Fixed Bug Reports...')
fixed_bug_reports.main()

print('Evaluating...')
evaluation.main()
def retrofit(wordVec,
             newWordVecs,
             dim,
             syn_lexicon,
             ant_lexicon,
             numIters=2,
             starting_alpha=1.0,
             lamda=0.5):
    wvVocab = set(wordVec.keys())
    dim = int(dim)
    loop_synVocab = set()
    loop_antVocab = set()
    for w in syn_lexicon.keys():
        if w.split('#')[0] in wvVocab:
            loop_synVocab.add(w)
    for w in ant_lexicon.keys():
        if w.split('#')[0] in wvVocab:
            loop_antVocab.add(w)

    word_count = len(loop_synVocab)
    global_step = 0
    for it in range(numIters):
        count = 0
        for word in loop_synVocab:
            count += 1
            global_step += 1
            wordNeighbours = set(syn_lexicon[word])
            numNeighbours = len(wordNeighbours)
            # no neighbours, pass - use data estimate
            if numNeighbours == 0:
                continue
            # Calculate learning rate
            alpha = starting_alpha * (1 - float(it / numIters)) / numNeighbours
            if alpha < starting_alpha * 0.00001:
                alpha = starting_alpha * 0.00001

            #negative sampling
            classifiers = [(word.split('#')[0], 1)]
            if word in loop_antVocab:
                classifiers += [
                    (target, 0)
                    for target in set(ant_lexicon[word]).intersection(wvVocab)
                ]

            neu1 = np.zeros(dim)
            #neu = np.zeros(dim)
            for nbWord in wordNeighbours:
                #estimate the vector which word not exist in the pretrained vectors
                if nbWord not in wvVocab:
                    wordVec[nbWord] = np.random.uniform(low=-0.5 / dim,
                                                        high=0.5 / dim,
                                                        size=(dim))
                    #adjust the random vector by synonyms already appeared
                    wordVec[nbWord] = lamda * wordVec[nbWord] + (
                        1 - lamda) * wordVec[word.split('#')[0]]
                    newWordVecs[nbWord] = wordVec[nbWord]
                # accumulate the contributions from neighbours
                neu1 += wordVec[nbWord]
            neu1e = np.zeros(dim)
            for target, label in classifiers:
                z = np.dot(neu1, newWordVecs[target])
                p = sigmoid(z)
                g = alpha * (label - p)
                neu1e += g * newWordVecs[target]
                newWordVecs[target] += g * neu1  # Update syn1

            for nbWord in wordNeighbours:
                #update synonyms
                wordVec[nbWord] += neu1e
            #update the word vector which with the definite sense
            wordVec[word] = (wordVec[word.split('#')[0]] +
                             neu1) / (1 + numNeighbours)

            sys.stdout.write(
                "\rIteration: No. %d Alpha: %f Progress: %d of %d (%.2f%%)" %
                ((it + 1), alpha, count, word_count,
                 float(count / word_count) * 100))
            sys.stdout.flush()

        if (it + 1) % 10 == 0:
            print('num of iteration:\n', str(it + 1))
            print('simlex_999:\n')
            sl_max, sl_avg = evaluation.main(wordVec, eval_data, dim)
            f_sl.write(
                str(it) + '\t' + str(round(sl_max + 0.00001, 3)) + '\t' +
                'NS-sv-sumMax' + '\n')
            f_sl.write(
                str(it) + '\t' + str(round(sl_avg + 0.00001, 3)) + '\t' +
                'NS-sv-sumMean' + '\n')
            print('simver_3500:\n')
            sv_max, sv_avg = evaluation.main(wordVec, eval_verb, dim)
            f_sv.write(
                str(it) + '\t' + str(round(sv_max + 0.00001, 3)) + '\t' +
                'NS-sv-sumMax' + '\n')
            f_sv.write(
                str(it) + '\t' + str(round(sv_avg + 0.00001, 3)) + '\t' +
                'NS-sv-sumMean' + '\n')

    return wordVec
Beispiel #10
0
                idx_Y = np.where(classifier.classes_ == 1)
            else:
                idx_Y = np.where(classifier.classes_ == "Y")

            # for each instance, output instance name & probability of "Y" class
            for name, prob in dict(zip(names_test, probabilities)).items():
                prob_Y = prob[idx_Y]
                log.debug(prob_Y)
                answers.write("%s %f\n" % (name, prob_Y))

        log.info(u"Wrote answers to '{0:s}'".format(answers_path))
        answers.close()

        # find truth file and evaluate results
        truth_path = os.path.join(args.training, "truth.txt")
        print(evaluation.main(truth_path, answers_path), file=sys.stderr)

        # and done
        log.info("End after cross validation.")
        sys.exit(0)  # End after CV.

    elif args.training:  # generate train & test instance arrays from train & test
        log.info("Running on {}".format(args.training))
        test = test_names = test_labels = None

        train, _, train_labels = build_dataset(args.training)
        train = __scale_features(train)
        log.debug(train)
        log.info("Length of feature set: {}".format(len(train[0])))
        log.debug('TRAIN LABELS:')
        log.debug(train_labels)
Beispiel #11
0
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

#folder = "./model/three_block_model"
folder = sys.argv[1]

steering_dict = []
collision_dict = []

logz.configure_output_dir(folder)

for infile in sorted(glob.glob(folder+"/model_weights*"), key=numericalSort):
    #print("Current File Being Processed is: " + infile)
    infile = infile.replace(folder+"/",'')
    #print("=================After replacing", infile)
    steering, collision = evaluation.main(["evaluation.py","--experiment_rootdir="+folder, "--weights_fname="+infile, "--test_dir=../../dronet/datasets/testing"])
    print("Steering--------:", steering)
    print("Collision-------:", collision)
    print(collision[0]['ave_accuracy'])
    logz.log_tabular('classification accuracy', collision[0]['ave_accuracy'])
    logz.log_tabular('RMSE', steering[0]['rmse'])
    logz.log_tabular('EVA', steering[0]['evas'])
    logz.log_tabular('F-Score', collision[0]['f_score'])
    logz.dump_tabular()
   # steering_dict.append(steering)
   # collision_dict.append(collision)
    
#print("STEERING: ", steering)
#print("COLLISION: ", collision)