Exemple #1
0
    def setup(save_path, layers=[1,2,3], curve_num=5000, chunk_size=1000, noisy=False, xray=False,
              show_plots=True, generate_data=True, train_classifier=True, train_regressor=True,
              classifer_epochs=2, regressor_epochs=2):
        """Sets up the pipeline for predictions on .dat files by generating data and training.

        Args:
            save_path (string): a path to the directory where data and models will be saved.
            layers (list): a list of layers to generate and train for.
            curve_num (int): the number of curves to generate per layer.
            chunk_size (int): the size of chunks to use in the h5 storage of images for curves.
            noisy (Boolean): whether to add noise to generated data.
            xray (Boolean): whether to use an x-ray probe or not.
            show_plots (Boolean): whether to display classification confusion matrix and regression plots or not.
            generate_data (Boolean): whether to generate data or use existing data.
            train_classifier (Boolean): whether to train the classifier or not.
            train_regressor (Boolean): whether to train the regressors or not.
            classifer_epochs (int): the number of epochs to train the classifier for.
            regressor_epochs (int): the number of epochs to train the regressor for.

        """
        if generate_data:
            print("-------------- Data Generation ------------")
            for layer in layers: #Generate curves for each layer specified.
                print(">>> Generating {}-layer curves".format(layer))
                if xray: #Generate data using x-ray probe.
                    structures = XRayGenerator.generate(curve_num, layer)
                    XRayGenerator.save(save_path + "/data", LAYERS_STR[layer], structures, noisy=noisy)
                else: #Generate data using neutron probe.
                    structures = NeutronGenerator.generate(curve_num, layer)
                    NeutronGenerator.save(save_path + "/data", LAYERS_STR[layer], structures, noisy=noisy)

                print(">>> Creating images for {}-layer curves".format(layer))
                save_path_layer = data_path_layer = save_path + "/data/{}".format(LAYERS_STR[layer])
                #Create images for the generated curves, ready for input to the classifier and regressors.
                generate_images(data_path_layer, save_path_layer, [layer], xray=xray, chunk_size=chunk_size, display_status=False)

            layers_paths = [save_path + "/data/{}".format(LAYERS_STR[layer]) for layer in layers]
            merge(save_path + "/data", layers_paths, display_status=False) #Merge the curves for each layer for classification.

        print("\n-------------- Classification -------------")
        if train_classifier:
            print(">>> Training classifier")
            classify(save_path + "/data/merged", save_path, train=True, epochs=classifer_epochs, show_plots=show_plots) #Train the classifier.
        else:
            print(">>> Loading classifier")
            load_path = save_path + "/classifier/full_model.h5" #Load a classifier.
            classify(save_path + "/data/merged", load_path=load_path, train=False, show_plots=show_plots)

        print("\n---------------- Regression ---------------")
        for layer in layers: #Train or load regressors for each layer that we are setting up for.
            data_path_layer = save_path + "/data/{}".format(LAYERS_STR[layer])
            if train_regressor:
                print(">>> Training {}-layer regressor".format(LAYERS_STR[layer]))
                regress(data_path_layer, layer, save_path, epochs=regressor_epochs, show_plots=show_plots, xray=xray) #Train the regressor.
            else:
                print(">>> Loading {}-layer regressor".format(LAYERS_STR[layer]))
                load_path_layer = save_path + "/{}-layer-regressor/full_model.h5".format(LAYERS_STR[layer]) #Load an existing regressor.
                regress(data_path_layer, layer, load_path=load_path_layer, train=False, show_plots=show_plots, xray=xray)
            print()
def main(model_name, model_type, model_opts, data_dir, iteration, sep_direction=True, test_aug=False, description=None):
    results = {}
    for key in [pm.D_ASCENDING, pm.D_DESCENDING]:
        results[key] = np.zeros((pm.NUM_CLASS, pm.NUM_CLASS), dtype=int)
    if description is not None:
        print('Description: {}'.format(description))
    audio_dir = os.path.join(data_dir, 'audio')
    mc_dir = os.path.join(data_dir, 'melody')
    model_class = getattr(models, model_type)
    param_set = getattr(pm, model_opts)
    output_dir = clf.output_dir
    clf.model_dir = os.path.join(clf.model_dir, model_name)
    clf.output_dir = os.path.join(clf.output_dir, model_name)
    if not os.path.isdir(clf.model_dir):
        os.mkdir(clf.model_dir)
    if not os.path.isdir(clf.output_dir):
        os.mkdir(clf.output_dir)
    ### load and pre-process input features
    feature_bank = clf.load_n_preprocess_input_feature(audio_dir, mc_dir, model_class, sep_direction)
    # np.save('feature_bank_spec+mc.npy', feature_bank)
    # feature_bank = np.load('feature_bank_mfcc.npy').item()
    print('Run {} iterations.'.format(iteration))
    for i in range(iteration):
        print('iteration: {}'.format(i))
        cm = clf.classify(feature_bank, model_name + '_' + str(i), model_class, param_set, sep_direction=True, test_aug=False)
        for key in cm:
            if key in results:
                results[key] += cm[key]
    for key in results:
        print('Final result of {}'.format(key))
        csv_fn = 'evaluation.' + key + '.csv'
        save_fp = os.path.join(output_dir, model_name, csv_fn)
        clf.eval_scores(results[key], key, print_scores=True, save_fp=save_fp)
def csvToJson(path):
    CSV_FILE_PATH = path
    filename = Path(path).stem
    JSON_FILE_PATH = filename + '.json'

    data = []
    with open(CSV_FILE_PATH) as csvFile:
        csvReader = csv.DictReader(csvFile)
        for rows in csvReader:
            if (not rows['Longitude'] or not rows['Latitude']):
                continue

            date = rows['Month']
            year = int(date[:4])
            month = int(date[5:7])

            category = classify(rows['Crime type'])

            entry = {
                'timestamp': datetime.datetime(year, month, 1).timestamp(),
                'longitude': rows['Longitude'],
                'latitude': rows['Latitude'],
                'crimeType': rows['Crime type'],
                'category': category
            }
            data.append(entry)

    with open(JSON_FILE_PATH, 'w') as jsonFile:
        jsonFile.write(json.dumps(data, indent=2))
Exemple #4
0
def main(cfg):
    try:
        # nltk.download("vader_lexicon")
        # nltk.download('wordnet')
        glbs = GlobalParameters()
        configs = get_cfg_files(cfg)
        total_files = len(configs)
        results = {}
        for i, config in enumerate(configs):
            print_message("Running config {}/{}".format(i + 1, total_files))
            set_global_parameters(config)
            print_run_details()
            dataset_dir = normalize()
            X, y = extract_features(dataset_dir)
            config_result = classify(X, y, glbs.K_FOLDS, glbs.ITERATIONS)
            glbs.RESULTS[glbs.FILE_NAME] = config_result
            glbs.RESULTS = add_results(glbs.RESULTS, glbs)
            if glbs.EXPORT_AS_BASELINE:
                export_as_baseline(config_result, config[1])
        if glbs.WORDCLOUD:
            print_message("Generating word clouds (long processes)")
            generate_word_clouds()
        add_results_glbs(results, glbs)
        write_results(divide_results(glbs.RESULTS))
        send_work_done(glbs.DATASET_DIR)
        print_message("Done!")
    except Exception as e:
        traceback.print_exc()
        send_work_done(glbs.DATASET_DIR,
                       "",
                       error=str(e),
                       traceback=str(traceback.format_exc()))
Exemple #5
0
def masking(image, emoji_points, detector, predictor, model, transforms):
    tmp = process_image(image, detector, predictor)
    if tmp is None:
        return None
    shape0, rect = tmp
    # im = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    im = image[max(rect.top(), 0):rect.bottom() + 25,
               max(rect.left(), 0):rect.right()]
    im = PIL.Image.fromarray(im)
    im = transforms(im)
    im = torch.unsqueeze(im, 0)
    label = classification.classify(model, im)
    emoji = cv2.imread("./emojis/" + label + ".png")
    # emoji = cv2.imread("./emojis/" + "smile" + ".png")
    image_pts = np.array([(shape0[51, :]), (shape0[8, :]), (shape0[36, :]),
                          (shape0[45, :]), (shape0[48, :]), (shape0[54, :])],
                         dtype="double")
    emoji, emoji_pts = process_emoji(emoji, emoji_points)
    homography, stats = cv2.findHomography(emoji_pts, image_pts)
    warped = cv2.warpPerspective(emoji, homography,
                                 (image.shape[1], image.shape[0]))
    warped_circle = cv2.warpPerspective(circle_mask(emoji.shape[0],
                                                    10), homography,
                                        (image.shape[1], image.shape[0]))
    warped_circle[warped_circle > 100] = 255.0
    warped_circle[warped_circle <= 100] = 0
    warped_circle = -1 * (warped_circle - 255)
    mask_warped = np.stack([warped_circle] * 3, axis=2)
    image = np.where(mask_warped, image, warped)
    # cv2.rectangle(image, (rect.left(), rect.top()), (rect.right(), rect.bottom()), (255, 0, 0), 5)
    return image
Exemple #6
0
def run_single_test(data_dir, output_dir):
    from classification import train_classifier, classify
    from keras import backend as K
    from keras.models import load_model
    from os import environ
    from os.path import abspath, dirname, join

    train_dir = join(data_dir, 'train')
    test_dir = join(data_dir, 'test')

    train_gt = read_csv(join(train_dir, 'gt.csv'))
    train_img_dir = join(train_dir, 'images')

    train_classifier(train_gt, train_img_dir, fast_train=True)

    code_dir = dirname(abspath(__file__))
    print('loading model...')
    model = load_model(join(code_dir, 'birds_model.hdf5'))
    print('loaded')
    test_img_dir = join(test_dir, 'images')
    img_classes = classify(model, test_img_dir)
    save_csv(img_classes, join(output_dir, 'output.csv'))

    if environ.get('KERAS_BACKEND') == 'tensorflow':
        K.clear_session()
Exemple #7
0
    def insertCrashReport(self, crash_report, commit=False, todict=True):
        """"Classify crash_report, insert to the database, return updated."""
        crash_report = crash_report['crash_report']

        # Find crash group
        crash_group_id = classify(crash_report)
        try:
            crash_group = self.session.query(CrashGroups).\
                filter(CrashGroups.id == crash_group_id).one()
        except:
            print "Clusterization error"
            exit(1)

        # Find application data
        app_dict = crash_report['application']
        try:
            application = self.session.query(Applications).\
                filter(Applications.name == app_dict['name']).\
                filter(Applications.version == app_dict['version']).\
                one()
        except NoResultFound:
            application = self.instertApplication(app_dict)

        # Find system_info data
        sys_info_dict = crash_report['system_info']
        try:
            system_info = self.session.query(SystemInfo).\
                filter(SystemInfo.version == sys_info_dict['version']).\
                one()
        except NoResultFound:
            system_info = self.insertSystemInfo(sys_info_dict)

        result = CrashReports(
            exit_code=crash_report['exit_code'],
            stderr_output=crash_report['stderr_output'],
            crash_group=crash_group,
            application=application,
            system_info=system_info
        )
        self.session.add(result)
        if commit:
            self.session.commit()
        if todict:
            return {
                'crash_report_ack': {
                    'crash_report_id': result.id,
                    'crash_report_url': 'vd1/crash_reports/' + str(result.id),
                    'crash_group_id': result.crash_group.id,
                    'crash_group_url': ('vd1/crash_groups/' +
                                        str(result.crash_group.id)),
                    'solution': {
                        "solution_id": result.crash_group.solution.id,
                        "solution_url": ('vd1/solutions/' +
                                         str(result.crash_group.solution.id)),
                        "shell_script": result.crash_group.solution.details
                    }
                }
            }
        else:
            return result
Exemple #8
0
def ranking_with_classifier(train_corpus,
                            test_corpus,
                            train_rels,
                            topics,
                            p,
                            ix,
                            alpha1=0.5,
                            alpha2=0.5):
    lst = []
    for topic_id in topics:
        results = ranking(topic_id, p, ix, "TF-IDF")
        results = [(el[0], el[1] / results[0][1]) for el in results]
        new_corpus = []
        for id1 in (el[0] for el in results):
            for el in test_corpus:
                if el[0] == id1:
                    new_corpus.append((id1, el[1]))

        topic = process_topic(topic_id, topic_directory)
        model = training(topic,
                         train_corpus,
                         train_rels,
                         model=KNeighborsClassifier(n_neighbors=25,
                                                    metric="euclidean"))
        classes = [
            classify(new_corpus[i][1], topic, model)
            for i in range(len(new_corpus))
        ]
        results = [(el[0], el[1] * alpha1 + classes[i] * alpha2)
                   for i, el in enumerate(results)]
        results.sort(reverse=True, key=lambda x: x[1])
        lst.append(results)
    return lst
    def bulk_insert(self):  # insert all documents from the corpus
        model = classification.train()

        actions = []
        for i in range(1, 13):
            with open("corpusi/reddit/month" + str(i) + ".csv", 'rb') as datafile:
                csv_file_obj = csv.reader(datafile)
                csv.field_size_limit(500 * 1024 * 1024)
                for row in csv_file_obj:
                    try:
                        v = {'body': '\n'.join(row),
                         'language': classification.classify(model, row)}
                    except UnicodeDecodeError:
                        print "unicode error"

                    actions.append(self.format_comment(v))  # add new document into array
        #now parse and insert the documentation, python then java
        python_dict = parse_python_docs()
        for key in python_dict:
            name = key
            document = python_dict[key]
            for method in document:
                description = document[method]
                actions.append(self.format_documentation(method,description,name,"Python"))
        java_dict = parse_java_docs()
        for key in java_dict:
            name = key
            document = java_dict[key]
            for method in document:
                description = document[method]
                actions.append(self.format_documentation(method,description,name,"Java"))

        return helpers.bulk(self.es, actions,
                            stats_only=True)  # perform bulk insert Input: array of nested dictionaries
def rank(embeddings_dict, list_of_texts, annotated_tokens, classifier):
    if(annotated_tokens is None or len(annotated_tokens) == 0):
        #Cannot rank None
        return None
    words_not_found_in_text = []
    for index, text in enumerate(list_of_texts):
        #There are multiple texts in the order of increasing precision 
        # text = text_preprocessors.text_to_lower(text)
        annotated_tokens = text_preprocessors.list_to_lower(annotated_tokens)
        tokenized_text = text_preprocessors.preprocess_text(text)
        # print tokenized_text
        # print annotated_tokens
        # tokenized_text = text_preprocessors.tokenize(text)
        # tokenized_text = text_preprocessors.tokens_remove_non_alpha(tokenized_text)
        feature_vectors_dict = classification.get_feature_vectors(tokenized_text, annotated_tokens, embeddings_dict)
        if(len(feature_vectors_dict['words_not_found_in_text']) > 0):
            if((index+1) != len(list_of_texts)):
                #A word was not found and there are remaining texts to be seen
                continue
            else:
                #This is the last text. Have to do with it
                words_not_found_in_text = feature_vectors_dict['words_not_found_in_text']

        predictions = classification.classify(feature_vectors_dict['words'], feature_vectors_dict['feature_vectors'], classifier)
    # print predictions
    if(predictions is None):
        print 'no predictions. Returning original list...'
        return annotated_tokens

    ranked_list = _rank_using_predictions(feature_vectors_dict['words'], predictions)
    # print ranked_list
    #Putting the words not found at the end
    ranked_list = ranked_list + words_not_found_in_text
    return ranked_list
Exemple #11
0
def loop_classify(args,
                  train_frac,
                  test_frac=None,
                  learning_rates=LEARNING_RATES,
                  tests=TESTS):

    name = f"{get_name(args)}_train_{train_frac}_test_{test_frac}"

    with open("./res/{}{}.txt".format(name, get_time()), "w") as file:
        file.write(
            f"training set fraction is {train_frac}, test set fraction is {test_frac}"
        )
        ress = {}
        for model in models_full:
            args.model = model
            print('model - {}'.format(model))
            file.write('\n\nModel: {}\n'.format(model))
            res = np.zeros((len(learning_rates), tests))
            for t in range(tests):
                embeddings, labels = embed_and_load(args)
                for i in range(len(learning_rates)):
                    lr = learning_rates[i]
                    train, train_labels, test, test_labels = select(
                        embeddings, labels, train_frac, test_frac)
                    res[i, t] = classify(train, train_labels, test,
                                         test_labels, args, iterations, lr)

            ress[model] = res

            exps = np.arange(tests)
            save_model(file, learning_rates, res)

    plot_results_vertical(learning_rates, ress, tests, name)
Exemple #12
0
 def get_true_sim(self, i, j, true_result):
     assert (true_result.dist_or_sim() == 'dist')
     _, d = true_result.dist_sim(i, j, FLAGS.dist_norm)
     c = classify(d, FLAGS.thresh_val_test_pos, FLAGS.thresh_val_test_neg)
     if c != 0:
         return c
     else:
         return None
Exemple #13
0
def semi_supervised_learning(train_file, unlabeled_file, eval_file,
                             num_indicators, tfidf):
    seed_tweets = read_in_data(train_file)
    unlabeled_tweets = [t[0] for t in read_in_data(unlabeled_file)]

    #defaults for optional parameters
    nl = DistantLabels(seed_tweets, num_indicators=num_indicators, tfidf=tfidf)
    distant_tweets = []
    for t in unlabeled_tweets:
        prediction = nl.predict_distant_label(t)
        if prediction: distant_tweets.append(prediction)
    write_out_data(distant_tweets, eval_file)

    #testing
    aggress_count = len([a for a in distant_tweets if a[1] == 'aggress'])
    loss_count = len([a for a in distant_tweets if a[1] == 'loss'])
    other_count = len([a for a in distant_tweets if a[1] == 'other'])

    print 'num distant aggress = ' + str(aggress_count)
    print 'num distant loss = ' + str(loss_count)
    print 'num distant other = ' + str(other_count)
    print 'num distant total = ' + str(aggress_count + loss_count +
                                       other_count)
    print 'num before distant labeling = ' + str(len(unlabeled_tweets))
    #\end testing

    num_updated = 1
    while num_updated > 0:
        num_updated = 0
        results = classify(train_file,
                           eval_file,
                           model,
                           label,
                           feats=feats,
                           pos_tagger=pos_tagger,
                           C=C,
                           svm_loss=loss)
        eval_tweets = read_in_data(eval_file)
        verified_tweets = read_in_data(train_file)
        predictions = results[-1]

        for i in range(0, len(predictions)):
            if predictions[i] == 1 and eval_tweets[i][1] == label:
                verified_tweets.append(eval_tweets[i])
                eval_tweets[i] = ''
                num_updated += 1
            elif predictions[i] == 0 and eval_tweets[i][1] != label:
                verified_tweets.append(eval_tweets[i])
                eval_tweets[i] = ''
                num_updated += 1
        eval_tweets = [e for e in eval_tweets if e != '']

        write_out_data(eval_tweets, eval_file)
        write_out_data(verified_tweets, verified_file)
        train_file = verified_file

    return train_file
Exemple #14
0
def drive():
    #    train_file = 'data/classification/_train/train_full.csv'
    #    dev_file = 'data/classification/_dev/dev_full.csv'

    #    train_file = "nov-new-dataset/train.csv"
    #    train_file = 'train.csv'
    train_file = 'distant_train.csv'

    #    dev_file = "nov-new-dataset/dev.csv"
    #    dev_file = 'dev.csv'
    #    dev_file = 'add.csv'
    #dev_file = 'data/preprocessed/arrogant_bubba.csv'
    dev_file = "nov-new-dataset/test.csv"

    model = 'svm'
    label = 'aggress'
    feats = [1, 1, 'n', 0, 'min_max/all', 1300]
    C = 0.3  # original: C=0.3, modified for experimentation purposes on
    # distantly labeled dataset
    loss = 'squared_hinge'

    print 'Training on ' + train_file + ', testing on ' + dev_file

    pos_tagger = None
    if feats[2] == 'u' or feats[2] == 'b':
        pos_tagger = train_tagger()

    results = classify(train_file,
                       dev_file,
                       model,
                       label,
                       feats=feats,
                       pos_tagger=pos_tagger,
                       C=C,
                       svm_loss=loss)

    #output results
    print
    print 'Results'
    print
    print 'sought precision: ' + str(results[0])
    print 'sought recall: ' + str(results[1])
    print 'sought f-score: ' + str(results[2])
    print
    print 'nsought precision: ' + str(results[3])
    print 'nsought recall: ' + str(results[4])
    print 'nsought f-score: ' + str(results[5])
    print
    print 'sought precision: ' + str(results[6])
    print 'sought recall: ' + str(results[7])
    print 'sought f-score: ' + str(results[8])

    pickle.dump(results[-1], open('predictions.txt', 'w'))

    return results
Exemple #15
0
def classify_photo():
    print(request.args.get('key', ''))
    print(request.form['name'])
    image_name = request.form['name']
    image_path = 'images/' + image_name
    image_url = request.form['url']

    #download photo from storage
    storage.child(image_name).download(image_path)
    labels = classify(image_path)

    data = {'name': image_name, 'url': image_url, 'descriptions': labels}
    return json.dumps(data)
Exemple #16
0
def without_pca():
    df = pd.read_csv('../Data/Final.csv')
    del df['serial_number']
    del df['Unnamed: 0']
    del df['dt']
    del df['manufacturer']

    X = df.iloc[:, 0:-1]
    y = df.iloc[:, -1]
    clf = GaussianNB()
    clf.fit(X, y)
    print('Accuracy without PCA: ')
    print('Naive Bayes:         ', clf.score(X, y) * 100, ' %')
    print('Logistic Regression: ', clf_2.classify(X, y) * 100, ' %')
def getBBoxes(cv2_image):
    ans = []
    bboxes = faster_rcnn.detect(cv2_image)
    for bbox in bboxes:
        cropped = cv2_image[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        if classification.classify(
                Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))):
            rbbox = []
            rbbox.append([bbox[0], bbox[1]])
            rbbox.append([bbox[2], bbox[1]])
            rbbox.append([bbox[2], bbox[3]])
            rbbox.append([bbox[0], bbox[3]])
            rbbox.append(number.give_result(model, cropped))
            ans.append(rbbox)
    return ans
Exemple #18
0
def upload_image():
    name = request.args.get('name')
    session_id = request.args.get('name')
    img = request.args.get('img')
    pos = request.args.get('pos')

    img = base64.b64decode(img)

    with open(name, 'wb') as f:
        f.write(img)
    
    # TODO: process image data (this should be done first)
    result = classifier.classify(name)
    print('Classification result: ' + str(result[0]) + ' - ' + str(result[1]))

    return str(result[0])
def draw_boxes(detections, image, colors, model):
    import cv2
    from PIL import Image
    for label, confidence, bbox in detections:
        left, top, right, bottom = bbox2points(bbox)
        cropped = image[top:bottom, left:right]
        if classification.classify(
                Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))):
            cv2.rectangle(image, (left, top), (right, bottom), colors[label],
                          1)
            height = number.give_result(model, cropped)
            cv2.putText(image, "{} [{:.2f}]".format("height limit",
                                                    float(height)),
                        (left, top - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        colors[label], 2)
    return image
def main(model_name,
         model_type,
         model_opts,
         data_dir,
         iteration,
         sep_direction=True,
         test_aug=False,
         description=None):
    results = {}
    for key in [pm.D_ASCENDING, pm.D_DESCENDING]:
        results[key] = np.zeros((pm.NUM_CLASS, pm.NUM_CLASS), dtype=int)
    if description is not None:
        print('Description: {}'.format(description))
    audio_dir = os.path.join(data_dir, 'audio')
    mc_dir = os.path.join(data_dir, 'melody')
    model_class = getattr(models, model_type)
    param_set = getattr(pm, model_opts)
    output_dir = clf.output_dir
    clf.model_dir = os.path.join(clf.model_dir, model_name)
    clf.output_dir = os.path.join(clf.output_dir, model_name)
    if not os.path.isdir(clf.model_dir):
        os.mkdir(clf.model_dir)
    if not os.path.isdir(clf.output_dir):
        os.mkdir(clf.output_dir)
    ### load and pre-process input features
    feature_bank = clf.load_n_preprocess_input_feature(audio_dir, mc_dir,
                                                       model_class,
                                                       sep_direction)
    # np.save('feature_bank_spec+mc.npy', feature_bank)
    # feature_bank = np.load('feature_bank_mfcc.npy').item()
    print('Run {} iterations.'.format(iteration))
    for i in range(iteration):
        print('iteration: {}'.format(i))
        cm = clf.classify(feature_bank,
                          model_name + '_' + str(i),
                          model_class,
                          param_set,
                          sep_direction=True,
                          test_aug=False)
        for key in cm:
            if key in results:
                results[key] += cm[key]
    for key in results:
        print('Final result of {}'.format(key))
        csv_fn = 'evaluation.' + key + '.csv'
        save_fp = os.path.join(output_dir, model_name, csv_fn)
        clf.eval_scores(results[key], key, print_scores=True, save_fp=save_fp)
Exemple #21
0
def process(df_train, X_train, y_train, df_test, X_test, y_test, clf_name):
    df_complete = pd.concat([df_train, df_test])
    X_complete = pd.concat([X_train, X_test])
    y_complete = pd.concat([y_train, y_test])

    augmenter = RelevantFeatureAugmenter(column_id='id')
    augmenter.timeseries_container = df_complete
    augmenter.fit(X_complete, y_complete)

    augmenter.timeseries_container = df_train
    X_train = augmenter.fit_transform(X_train, y_train)

    augmenter.timeseries_container = df_test
    transformed_X_test = augmenter.transform(X_test)

    y_pred = classify(X_train, y_train, transformed_X_test, clf_name)
    return log_of_classification_results(y_test, y_pred)
Exemple #22
0
def loop_classify_reweightings(args,
                               train_frac,
                               test_frac,
                               reweight_value,
                               learning_rates=LEARNING_RATES,
                               seperate=False,
                               tests=TESTS):

    args.raw_ricci = False

    name = get_name(args)

    with open(
            "./res/{}_rew{}_sep{}_{}.txt".format(name,
                                                 reweight_value, seperate,
                                                 get_time()), "w") as file:

        file.write(
            f"training set fraction is {train_frac}, test set fraction is {test_frac}, reweight value is {reweight_value}"
        )

        ress = {}
        for model in models_full:
            args.model = model
            print('model - {}'.format(model))
            file.write('\n\nModel: {}\n'.format(model))
            res = np.zeros((len(learning_rates), tests))
            for t in range(tests):
                for i in range(len(learning_rates)):
                    train, train_labels, test, test_labels = embed_and_select(
                        args,
                        train_frac,
                        test_frac,
                        reweight=True,
                        seperate=True,
                        reweight_value=reweight_value)
                    lr = learning_rates[i]
                    res[i, t] = classify(train, train_labels, test,
                                         test_labels, args, iterations, lr)

            ress[model] = res

            save_model(file, learning_rates, res)

    plot_results_vertical(learning_rates, ress, tests,
                          f'{name}_rew{reweight_value}_sep{seperate}')
Exemple #23
0
def compute_score(train, test):
    """
    From each simple example_x taken in test,
    obtains a classification using train as examples for classify()
    Checks if classify finds the correct class.
    Returns the proportion of successful classifications.

    Parameters
    ----------
    train: {list} of {tuple}
    test: {list} of {tuple}

    Returns
    -------
    {float} : the proportion of successful classifications in test using train
    """
    return sum([classify(sample, train) == sample[4]
                for sample in test]) / len(test)
Exemple #24
0
def run_analysis():
    with open(constants.OUTPUT_FOLDER + 'results.csv', 'w',
              newline='') as file:
        writer = csv.writer(file)
        writer.writerow([
            "obj_sizes", "grid_size", "padding", "iou", "batch_size", "resize",
            "interpolation", "loss_0", "accuracy_0", "loss_1", "accuracy_1"
        ])
        file.flush()
        for crop_params in crop_grid:
            print("Clearing current directory...")
            clear_out_folder()
            print("Completed\n")

            print("Deconstructing images...")
            images = discretize()[crop_params['size']]
            print(images)
            exit(0)
            for image in images:
                print("Processing image" + image)
                crop_obj = ObjectCrop(out_folder=constants.OUTPUT_FOLDER,
                                      img_label=image)
                crop_obj.deconstruct(gh=crop_params['grid_size'][1],
                                     gw=crop_params['grid_size'][0],
                                     padding=crop_params['padding'],
                                     iou_thresh=crop_params['iou'],
                                     test_run=False)
            print("Completed\n")

            print("Running classifications...")
            for classify_params in classify_grid:
                classify_results = classify(classify_params['batch_size'],
                                            classify_params['size'],
                                            classify_params['interpolation'])
                writer.writerow([
                    crop_params['size'], crop_params['grid_size'],
                    crop_params['padding'], crop_params['iou'],
                    classify_params['batch_size'], classify_params['size'],
                    classify_params['interpolation'], classify_results[0],
                    classify_results[1], classify_results[2],
                    classify_results[3]
                ])
                file.flush()
            print("Completed")
Exemple #25
0
def main(cfg):
    try:
        glbs = GlobalParameters()
        configs = get_cfg_files(cfg)
        results = {}
        n_test_dir = ""
        total_files = len(configs)
        for i, config in enumerate(configs):
            print_message("Running config {}/{}".format(i + 1, total_files))
            set_global_parameters(config)
            print_run_details()
            n_train_dir = normalize()
            if glbs.TEST_DIR != "":
                n_test_dir = normalize(test=True)
            train, tr_labels, test, ts_labels, all_features = extract_features(
                n_train_dir, n_test_dir)
            for selection in glbs.SELECTION:
                try:
                    train, test = get_selected_features(
                        selection, train, tr_labels, test, ts_labels,
                        all_features)
                except:
                    pass
            results[glbs.FILE_NAME] = classify(train,
                                               tr_labels,
                                               test,
                                               ts_labels,
                                               all_features,
                                               model_number=i)
            results = add_results(results)
        if glbs.WORDCLOUD:
            print_message("Generating word clouds (long processes)")
            generate_word_clouds()
        write_results(divide_results(results))
        send_work_done(glbs.TRAIN_DIR)
        print_message("Done!")
        # clean_backup_files()
    except Exception as e:
        traceback.print_exc()
        send_work_done(glbs.TRAIN_DIR,
                       "",
                       error=str(e),
                       traceback=str(traceback.format_exc()))
Exemple #26
0
def upload_file():
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            #flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        # if user does not select file, browser also
        # submit a empty part without filename
        if file.filename == '':
            #flash('No selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            classificationResult = classification.classify()
            return redirect(url_for('upload_file',
                                    filename=filename))
    return '''
Exemple #27
0
    def mine_to_database(self):
        current_url = "http://jobs.scotiabank.com/careers/it-jobs/job-list-1"
        db_cursor = self.__conn.cursor()

        db_cursor.execute("""CREATE TABLE IF NOT EXISTS job_table (
        Title VARCHAR (200),
        Company VARCHAR (45),
        Date_Posted DATE ,
        Location VARCHAR (45),
        URL VARCHAR (200),
        Description TEXT,
        Difficulty ENUM('Co-op/internship','Entry','Experienced','Manager'),
        Active TINYINT(1) DEFAULT '1')""")
        db_cursor.execute("update job_table SET Active = '0' WHERE Company = 'Scotia Bank'")

        while 1:
            soup = BeautifulSoup(requests.get(current_url).content)
            rows = soup.find("table", {"class": "info-table"}).find_all("tr")

            for job in rows[4:-1]:
                title = job.find("td", {"class": "jobTitle"}).find('a').text
                location = job.find("td", {"class": "location"}).text
                date = datetime.datetime.strptime(job.find("td", {"class": "custom1"}).text, '%m/%d/%Y').strftime(
                    '%Y-%m-%d')
                job_url = urljoin(self.url, job.find("td", {"class": "jobTitle"}).find('a').get("href"))
                desc = self.get_desc(job_url)
                difficulty = str(classify(desc))

                db_cursor.execute("""INSERT INTO job_table (Title, Company, Date_Posted, Location, URL, Description,Difficulty, Active)
                VALUES (%s, %s, %s, %s, %s, %s,%s, %s) ON DUPLICATE KEY UPDATE
                  Date_Posted=%s, Location=%s, Description=%s, Company='Scotia Bank', Active = %s, difficulty=%s""",
                                  (title, "Scotia Bank", date, location, job_url, desc, difficulty, '1', date, location, desc, '1', difficulty))

            # break
            next_page = soup.find("td", {"class": "pagination"}).find("a", {"class": "pagination-more"})
            if next_page != None:
                current_url = urljoin(self.url, next_page.get("href"))
            else:
                break

        self.__conn.commit()
        db_cursor.close()
Exemple #28
0
    def mine_to_database(self):
        db_cursor = self.__conn.cursor()
        current_num = 0
        id = 200000

        db_cursor.execute("""CREATE TABLE IF NOT EXISTS job_table (
        Title VARCHAR (200),
        Company VARCHAR (45),
        Date_Posted DATE ,
        Location VARCHAR (45),
        URL VARCHAR (200),
        Description TEXT,
        Difficulty ENUM('Co-op/internship','Entry','Experienced','Manager'),
        Active TINYINT(1) DEFAULT '1')""")

        db_cursor.execute("update job_table SET Active = '0' WHERE Company = 'RBC'")

        while 1:
            current_url = str(self.url + `current_num` + "/?q=&sortColumn=referencedate&sortDirection=desc")
            soup = BeautifulSoup(requests.get(current_url).content)
            total_job_num = int(soup.find("span", {"class" : "paginationLabel"}).find_all("b")[1].text)
            rows = soup.find("table", {"class" : "searchResults full table table-striped table-hover"}).find("tbody").find_all("tr")
            if current_num  > total_job_num:
                break
            for job in rows:
                id += 1
                title = job.find("td",{"class":"colTitle"}).find("span").find("a").text
                location = job.find("td",{"class":"colLocation hidden-phone"}).find("span").text
                raw_date = job.find("td",{"class":"colDate hidden-phone"}).find("span").text.replace("\n","").replace("\t","")
                date = datetime.datetime.strptime(raw_date,"%b %d, %Y").strftime("%Y-%m-%d")
                job_url = urljoin(self.url,job.find("td",{"class":"colTitle"}).find("a",{"class":"jobTitle-link"}).get("href"))
                desc = self.get_desc(job_url)
                difficulty = str(classify(desc))
                db_cursor.execute("""INSERT INTO job_table (Title, Company, Date_Posted, Location, URL, Description, Difficulty, Active)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE
                  Date_Posted=%s, Location=%s, Description=%s, Active = %s, Difficulty = %s""",
                                  (title, "RBC", date, location,  job_url, desc, difficulty, '1', date, location, desc, '1',difficulty ))

            current_num += 25

        self.__conn.commit()
        db_cursor.close()
Exemple #29
0
def inference(event, context):
    try:
        # Fetch data
        data = fetch_post_data(event)
        infer_config = fetch_inference_json()
        print('post data and inference config fetched')

        # Check if token exists
        if not data['token'] in infer_config:
            print(f'Token {data["token"]} not found')
            return create_response({
                'result': 'error',
                'message': 'No such token found.'
            })

        # Make predictions
        task_config = infer_config[data['token']]
        if task_config['task_type'] == 'classification':
            model = fetch_classification_model(task_config['model_filename'])
            output = classify(model, data['input'], task_config['classes'])
        else:
            model_path, model_metadata_path = fetch_sa_data(
                task_config['model_filename'],
                task_config['metadata_filename'],
            )
            output = get_sentiment(data['input'], model_path,
                                   model_metadata_path)

        return create_response({
            'result': 'success',
            'prediction': output,
        })
    except Exception as e:
        print(repr(e))
        return create_response(
            {
                'result': 'internal_error',
                'message': repr(e),
            },
            status_code=500)
Exemple #30
0
def selectionHalfMethod(X, y, all_features):
    glbs = GlobalParameters()
    filename = glbs.FILE_NAME
    results = {}
    # nxt = (glbs.SELECTION[0][0], int(glbs.SELECTION[0][1]))
    nxt = (glbs.SELECTION[0][0], int(glbs.SELECTION[0][1]))
    max_last_result = 0
    bottom = (0, 0)
    top = nxt
    while top != bottom:
        max_nxt_result = 0
        print_message(nxt[0])
        print_message(nxt[1])
        glbs.FILE_NAME = glbs.FILE_NAME + str(nxt[1])
        select = select_k_best(nxt[0], int(nxt[1]))
        glbs.FEATURE_MODEL[1] = select
        results[glbs.FILE_NAME] = classify(X, y, glbs.K_FOLDS, glbs.ITERATIONS)
        for method in results[glbs.FILE_NAME].items():
            if mean(method[1]["accuracy"]) > max_nxt_result:
                max_nxt_result = mean(method[1]["accuracy"])
        results = add_results(results, glbs, nxt)
        if max_nxt_result >= max_last_result:
            top = nxt
            if bottom[1] == 0:
                nxt = (nxt[0], int(int(nxt[1]) / 2))
            if bottom[1] != 0:
                nxt = (nxt[0], int((int(nxt[1]) + bottom[1]) / 2))
            max_last_result = max_nxt_result
        elif max_nxt_result < max_last_result:
            bottom = nxt
            nxt = (nxt[0], int((top[1] + bottom[1]) / 2))
        glbs.SELECTION[0] = nxt
        if bottom[1] - top[1] == -1 and bottom == nxt:
            break
    glbs.FILE_NAME = filename
    add_results_glbs(results, glbs)
im = cv2.imread('faces/face_85.png', 0)
plt.imshow(im, cmap=plt.cm.gray)
plt.show()

# set up window parameters
window_size = 100
shift_size = 25

# scale the strokes
scale_factor = 8

# how many faces do you want to run?
num_faces = 10

for face in load_faces(n=num_faces):
    im = cv2.imread(face, 0)

    # documentation is in classification.py
    # thresh is minimum confidence
    # eyes, noses, mouths take the confident bounding boxes
    # verbose shows each window classification and accuracy
    classify(im,
             window_size=window_size,
             shift_size=shift_size,
             scale_factor=scale_factor,
             thresh=0.5,
             eyes=2,
             noses=1,
             mouths=1,
             verbose=True)
from sklearn.metrics import classification_report
from sklearn.metrics import auc
import time
import pprint

names = ["svm", "adaboost", "random_forest", "decision_tree", "MultinomialNB"]
for algorithm in names:
    for use_nlp in [False]:
        for use_tfidf in [False]:
            if not use_nlp and use_tfidf:
                continue
            for n_gram in [1, 2, 3]:
                if not use_nlp and n_gram > 1:
                    continue
                pprint.pprint(algorithm)
                pprint.pprint("use_nlp = " + str(use_nlp))
                pprint.pprint("use_tfidf = " + str(use_tfidf))
                pprint.pprint("n_gram = " + str(n_gram))

                t = time.time()
                test_true, test_predict = classify(name=algorithm,
                                                   pr=True,
                                                   use_CV=True,
                                                   use_nlp=use_nlp,
                                                   use_tfidf=use_tfidf,
                                                   n_grams=n_gram,
                                                   combine_numerical_nlp=False)
                run_time = time.time() - t
                print(classification_report(test_true, test_predict))
                print "Run time: " + str(run_time)
Exemple #33
0
def main():
    dataSet, listClassses = NBL.loadDataSet()
    nb = NBL.NBayes()
    nb.tran_set(dataSet, listClassses)
    print(CL.classify(nb.tf[3], nb.tf, listClassses, k))
Exemple #34
0
                                            features,
                                            'safe_loans',
                                            max_depth=6,
                                            min_node_size=100,
                                            min_error_reduction=0.0)

my_decision_tree_old = decision_tree_create(train_data,
                                            features,
                                            'safe_loans',
                                            max_depth=6,
                                            min_node_size=0,
                                            min_error_reduction=-1)

validation_set[0]

print 'Predicted class: %s ' % classify(my_decision_tree_new,
                                        validation_set[0])

classify(my_decision_tree_new, validation_set[0], annotate=True)

classify(my_decision_tree_old, validation_set[0], annotate=True)

evaluate_classification_error(my_decision_tree_new, validation_set)

model_1 = decision_tree_create(train_data,
                               features,
                               'safe_loans',
                               max_depth=2,
                               min_node_size=0,
                               min_error_reduction=-1)
model_2 = decision_tree_create(train_data,
                               features,
Exemple #35
0
    tracklet_clustering.cluster(tracklets_path, videonames, INSTANCE_ST, INSTANCE_TOTAL, clusters_path, visualize=False)

    tracklet_representation.train_bovw_codebooks(tracklets_path, videonames, traintest_parts, INTERNAL_PARAMETERS['feature_types'], intermediates_path, pca_reduction=False)
    tracklet_representation.train_fv_gmms(tracklets_path, videonames, traintest_parts, INTERNAL_PARAMETERS['feature_types'], intermediates_path)

    tracklet_representation.compute_bovw_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, \
                                                     INSTANCE_ST, INSTANCE_TOTAL, \
                                                     INTERNAL_PARAMETERS['feature_types'], feats_path + 'bovwtree/', \
                                                     pca_reduction=False, treelike=True, global_repr=True, clusters_path=clusters_path)
    tracklet_representation.compute_fv_descriptors(tracklets_path, intermediates_path, videonames, traintest_parts, \
                                                   INSTANCE_ST, INSTANCE_TOTAL, \
                                                   INTERNAL_PARAMETERS['feature_types'], feats_path + 'fvtree/', \
                                                   treelike=True, global_repr=True, clusters_path=clusters_path)

    c = [0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000]

    st_time = time.time()
    results = classification.classify(feats_path + 'bovwtree/', videonames, class_labels, traintest_parts, \
                                      np.linspace(0, 1, 11), INTERNAL_PARAMETERS['feature_types'], \
                                      c=c)
    print('ATEP classification (bovwtree) took %.2f secs.' % (time.time() - st_time))
    print_results(results)

    st_time = time.time()
    results = classification.classify(feats_path + 'fvtree/', videonames, class_labels, traintest_parts, \
                                      np.linspace(0, 1, 11), INTERNAL_PARAMETERS['feature_types'], \
                                      c=c)
    print('ATEP classification (bovwtree) took %.2f secs.' % (time.time() - st_time))
    print_results(results)

    quit()  # TODO: remove this for further processing
    preprocessing_steps = None
x_tr, x_te, groups_tr, _ = preprocess_data(
    x_tr,
    x_te,
    preprocessing_steps=preprocessing_steps
)


# Classification
clf = classify(
    est=est_list[est_name],
    x_tr=x_tr.values,
    y_tr=y_tr.values.ravel(),
    groups_tr=groups_tr.values,
    x_te=x_te.values,
    test_index=x_te.index,
    perform_evaluation=perform_evaluation,
    perform_cross_validation=perform_cross_validation,
    cv_params=cv_params[est_name],
    compute_submission=compute_submission,
    submission_path=(submission_folder + 'y_te_pred.csv'),
    random_state=42
)

# Feature importance
if plot_feature_importance:
    try:
        plot_avg_feature_importance(clf.feature_importances_, x_tr.columns)
    except:
        print('Feature importance not available\n')
Exemple #37
0
#!/usr/bin/env python
import os
import os.path
import sys
from haar_cascades import create_crops
from classification import classify





if len(sys.argv) == 4:
    source_dir = sys.argv[1]
    haar_model = sys.argv[2]
    caffe_model = sys.argv[3]
    deploy_file = sys.argv[4]
else:
    print(
        "usage: %s source_dir haar_model caffe_model caffe_deploy_file" %
     __file__)
    sys.exit(1)

source_paths = []
for image in os.listdir(source_dir):
    source_paths.append = os.path.join(source_dir, image)

create_crops(haar_model, source_paths, crop_path)

classifications = classify(caffe_model, deploy_file, crop_path)
crop_file = "/home/user/FinalAssignment/Downloads/DownloadFile_"+map_type+".tif"

#Download the image
try:
    select_image(url, key, point, map_type, crop_file)
except:
    print "ERROR: No map available"

#Cut to size
cut_size(crop_file, 1500, 1000, 1500, 2000, in_file)

#Creating model to classify trees
create_model(in_file, statistics_file, training_poly, output_model, confusion_matrix)

#Apply model
classify(output_model, in_file, statistics_file, output_map)

#Delete all none trees from dataset
select_trees(output_map, selection_map)

#Calculate percentage green per quadrant (format: nw, sw, ne, se)
print greencalculator(output_map)

##Apply model to other map

#Files
output_map1 = "/home/user/FinalAssignment/output/ClassifiedImage1.tif"
selection_map1 = "/home/user/FinalAssignment/output/ClassifiedImageTrees1.tif"
in_file1 = "/home/user/FinalAssignment/output/InputMap1.tif"

#Coordinates of point
    if 'atep-bovw' in args.methods:
        tracklet_representation.train_bovw_codebooks(tracklets_path, videonames, traintest_parts, xml_config['features_list'], intermediates_path, pca_reduction=True, nt=args.nt, verbose=args.verbose)
        tracklet_representation.compute_bovw_descriptors_multithread(tracklets_path, intermediates_path, videonames, traintest_parts, xml_config['features_list'], \
                                                                     feats_path + '/bovwtree/', \
                                                                     treelike=True, pca_reduction=True, clusters_path=clusters_path, nt=args.nt, verbose=args.verbose)

        atep_bovw = kernels.compute_ATEP_kernels(feats_path + '/bovwtree/', videonames, traintest_parts, xml_config['features_list'], \
                                                 kernels_path + '/atep-bovw/', kernel_type='intersection', norm='l1', power_norm=False, \
                                                 use_disk=False, nt=args.nt, verbose=args.verbose)

        params = [[[1]], [1], np.linspace(0,1,21), desc_weights_gbl]
        results = classification.classify(atep_bovw, \
                                          class_labels, traintest_parts, params, \
                                          xml_config['features_list'], \
                                          C=C_gbl,
                                          strategy=strategy_gbl,
                                          opt_criterion=opt_criterion,
                                          verbose=args.verbose)
        classification.print_results(results)

    if 'atep-fv' in args.methods:
        tracklet_representation.train_fv_gmms(tracklets_path, videonames, traintest_parts, xml_config['features_list'], intermediates_path, pca_reduction=True, nt=args.nt, verbose=args.verbose)
        tracklet_representation.compute_fv_descriptors_multithread(tracklets_path, intermediates_path, videonames, traintest_parts, xml_config['features_list'], \
                                                                   feats_path + '/fvtree/', \
                                                                   treelike=True, pca_reduction=True, clusters_path=clusters_path, nt=args.nt, verbose=args.verbose)

        atep_fv = kernels.compute_ATEP_kernels(feats_path + '/fvtree/', videonames, traintest_parts, xml_config['features_list'], \
                                               kernels_path + '/atep-fv/', use_disk=False, nt=args.nt, verbose=args.verbose)

        params = [[[1]], [1], np.linspace(0,1,21), desc_weights_gbl]
Exemple #40
0
from classification import classify
import pylab as pl
from sklearn.metrics import classification_report
from sklearn.metrics import auc
import time
import pprint

names = ["svm", "adaboost", "random_forest", "decision_tree", "MultinomialNB"]
for algorithm in names:
  for use_nlp in [False]:
    for use_tfidf in [False]:
      if not use_nlp and use_tfidf :
        continue
      for n_gram in [1,2,3] :
        if not use_nlp and n_gram > 1 :
          continue
        pprint.pprint( algorithm )
        pprint.pprint( "use_nlp = " + str( use_nlp) )
        pprint.pprint( "use_tfidf = " + str( use_tfidf) )
        pprint.pprint( "n_gram = " + str( n_gram) )

        t = time.time()
        test_true, test_predict = classify(name = algorithm, pr= True, use_CV=True, use_nlp=use_nlp, use_tfidf=use_tfidf, n_grams = n_gram, combine_numerical_nlp = False)
        run_time = time.time() - t
        print(classification_report(test_true, test_predict))
        print "Run time: " + str(run_time)