Beispiel #1
0
def cross_validation():
    print('Starting cross validation')
    dr_training_data_arr = get_array(
        output_dimensional_reduction_training_data)
    logger.method_timer('Getting array from DR_training_data.csv')

    dr_training_data_arr_no_app_id = remove_first_column(dr_training_data_arr)
    logger.method_timer('Getting the tf-idf values for training')

    training_labels_arr = get_array(input_training_labels)
    training_labels_arr_no_app_id = remove_first_column(training_labels_arr)

    merged_dr_training_data_labels = merge_arrs(
        training_labels_arr_no_app_id, dr_training_data_arr_no_app_id)
    logger.method_timer('Merging the input and output arrays')

    Train_IO_Pair, Test_IO_Pair = get_cross_validation_input_output_pairs(
        merged_dr_training_data_labels, training_set_size)
    logger.method_timer(
        'Splitting training data into test and training set')

    X_Train = remove_first_column(Train_IO_Pair)
    X_Test = remove_first_column(Test_IO_Pair)

    Y_Train = get_arr_column(Train_IO_Pair, 0)
    Y_Test = get_arr_column(Test_IO_Pair, 0)

    # .ravel() because Y_Train is expected to be a 1D array
    # classifier.scikit_classify(X_Train, Y_Train.ravel(), X_Test, Y_Test)
    classifier.classify(X_Train, Y_Train.ravel(), X_Test, Y_Test)
Beispiel #2
0
def main():
    data_dir = sys.argv[1]
    filenames = os.listdir(data_dir)

    rusentilex_path = sys.argv[2]
    sentiment_dict = {}
    build_sentiment_dict(sentiment_dict, rusentilex_path)

    datasets_banks = []
    datasets_tkk = []
    for fn in filenames:
        if re.fullmatch('bank.+(train|etalon).+', fn):
            dataset = build_dataset(os.path.join(data_dir, fn), sentiment_dict)
            datasets_banks.append(dataset)
        elif re.fullmatch('tkk.+(train|etalon).+', fn):
            dataset = build_dataset(os.path.join(data_dir, fn), sentiment_dict)
            datasets_tkk.append(dataset)
    datasets_banks_full = pd.concat(datasets_banks)
    datasets_tkk_full = pd.concat(datasets_tkk)

    X = datasets_banks_full.iloc[:, :-1]
    y = list(datasets_banks_full.iloc[:, -1])
    y_pred = classify(X)

    print('Banks')
    print('F1_micro =', f1_score(y, y_pred, average='micro'))
    print('F1_macro =', f1_score(y, y_pred, average='macro'), '\n')

    X = datasets_tkk_full.iloc[:, :-1]
    y = list(datasets_tkk_full.iloc[:, -1])
    y_pred = classify(X)

    print('TKK')
    print('F1_micro =', f1_score(y, y_pred, average='micro'))
    print('F1_macro =', f1_score(y, y_pred, average='macro'))
Beispiel #3
0
def main(args=sys.argv[1:]):
    parser = argparse.ArgumentParser(description='')  #todo dodać opis
    parser.add_argument('sequence', type=str, help='sequence in plaintext')
    parser.add_argument('--nucleic_acid',
                        type=str,
                        help='nucleic acid: either DNA or RNA',
                        choices=['DNA', 'RNA', 'dna', 'rna'])
    parser.add_argument(
        '--classifier',
        type=str,
        help='classifier: SVC, kNN, QDA or LR',
        choices=['SVC', 'kNN', 'QDA', 'LR', 'svc', 'knn', 'qda', 'lr'])
    parser.add_argument('--probas', '-p', dest='probas', action='store_true')
    parsed_args = parser.parse_args(args)
    classifier_name = parsed_args.classifier.lower()
    nucleic_acid = parsed_args.nucleic_acid.lower()
    feature_indices = constants.feature_indices[parsed_args.classifier.lower()]
    # if not (parsed_args.classifier.lower() == 'svc' or parsed_args.classifier.lower() == 'knn' or
    #                 parsed_args.classifier.lower() == 'qda'):
    #     raise ValueError("Classifier should be SVC, kNN or QDA")
    # if not (parsed_args.nucleic_acid.lower() == 'dna' or parsed_args.nucleic_acid.lower() == 'rna'):
    #     raise ValueError("Nucleic acid tye should be either DNA or RNA")
    sequence = read_sequence.read_sequence(parsed_args.sequence)
    scaler = joblib.load(constants.scaler_path)
    classifier = joblib.load(constants.classifier_paths[classifier_name])
    seq_features = seq_to_features(sequence, nucleic_acid)
    print classify(seq_features, scaler, classifier, feature_indices,
                   parsed_args.probas)
Beispiel #4
0
def classify_digits(region, digits=None, reverse=False):
    """Classifies all 3 digits and an arrow in a digit region"""
    region = crop_height(region)  # Shrink the region
    # Sum everything in columns
    col_sum = np.sum(region, axis=0)

    if digits is None:
        digits = region.copy()

    # Section each digit based on the black columns, each start and stop is a digit.
    start1, stop1 = number_block(col_sum, 0, reverse)
    start2, stop2 = number_block(col_sum, stop1 + 1, reverse)
    start3, stop3 = number_block(col_sum, stop2 + 1, reverse)
    start4, stop4 = number_block(col_sum, stop3 + 1, reverse)

    #Extract the digits from the image. Num4 is the arrow
    num1 = digits[:, start1:stop1]
    num2 = digits[:, start2:stop2]
    num3 = digits[:, start3:stop3]
    arrow = digits[:, start4:stop4]

    # Crop the digits down vertically
    num1 = crop_height(num1)
    num2 = crop_height(num2)
    num3 = crop_height(num3)
    arrow = crop_height(arrow)
    # Return the classification ID's for each of the regions.
    return cl.classify(num1)[0], cl.classify(num2)[0], cl.classify(
        num3)[0], cl.classify(arrow)[0]
def main(directory, solr, doiRecords, to_classify, doi):
    start_time = time.time()

    if doi is None and doiRecords is None:
        print "Please either pass in doiRecords or single doi"
        sys.exit(1)

    directory = directory + "/" if directory[-1] is not "/" else directory
    if not os.path.isdir(directory):
        print directory + " is not a directory, creating it and continuing..."
        os.makedirs(directory)

    tools_dir = directory + "tools/"
    non_tools_dir = directory + "non_tools/"
    tools_xml_dir = directory + "tools_xml/"
    tools_txt_dir = directory + "tools_txt/"
    output_file = directory + "output.json"

    # Classify
    if to_classify != 0:
        classify(directory, tools_dir, non_tools_dir)
    else:
        tools_dir = directory

    # Grobid extraction
    grobid_extraction(tools_dir, tools_xml_dir, tools_txt_dir)

    # Parse grobid data into output file
    parse_extracts(tools_xml_dir, tools_txt_dir, doiRecords, output_file, doi)

    # Push to Solr if needed
    if solr is not None and solr == 1:
        pushToSolr(output_file)

    print (" Total time taken:    --- %s seconds ---" % (time.time() - start_time))
def home():
    algorithms = {
        'Neural Network': '92.26 %',
        'Support Vector Classifier': '89 %'
    }
    result, accuracy, name, sdk, size = '', '', '', '', ''
    if request.method == "POST":
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        # if user does not select file, browser also
        # submit an empty part without filename
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file and file.filename.endswith('.apk'):
            filename = secure_filename(file.filename)
            print(filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            if request.form['algorithm'] == 'Neural Network':
                accuracy = algorithms['Neural Network']
                result, name, sdk, size = classifier.classify(
                    os.path.join(app.config['UPLOAD_FOLDER'], filename), 0)
            elif request.form['algorithm'] == 'Support Vector Classifier':
                accuracy = algorithms['Support Vector Classifier']
                result, name, sdk, size = classifier.classify(
                    os.path.join(app.config['UPLOAD_FOLDER'], filename), 1)
    return render_template("index.html",
                           result=result,
                           algorithms=algorithms.keys(),
                           accuracy=accuracy,
                           name=name,
                           sdk=sdk,
                           size=size)
Beispiel #7
0
def whole_process(imagename):
	image_in = clipping_image(imagename) # return path of txt. file which contains the paths of cropped images
	
	# read path
	with open(image_in,'r') as f:
		im_path = f.read()
        #print im_path
	classifier.classify(im_path) # print all the predictions for each image under im_path
def main(argv):
    if len(argv) == 0:
        show_help()
        sys.exit()

    # Parse parameters.
    training_dir = ""
    output_dir = ""
    query_path = ""
    training_db = ""
    training_mode = False
    classify_mode = False
    classify_mode_alg = classifier.CLASSIFIER_ALG_BF
    results_prefix = ""
    validate = False
    try:
        opts, args = getopt.getopt(argv, "t:vo:c:a:d:r:", [
            "training=", "validate=", "output=", "classify=", "algorithm=",
            "data=", "results="
        ])
    except getopt.GetoptError:
        show_help()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-t", "--training"):
            training_dir = arg
            training_mode = True
        elif opt in ("-o", "--output"):
            output_dir = arg
        elif opt in ("-c", "--classify"):
            query_path = arg
            classify_mode = True
        elif opt in ("-a", "--algorithm"):
            if arg == "bf":
                classify_mode_alg = classifier.CLASSIFIER_ALG_BF
            elif arg == "hist":
                classify_mode_alg = classifier.CLASSIFIER_ALG_HIST
            else:
                print "Illegal value for -a/--algorithm: " + arg
                sys.exit(3)
        elif opt in ("-d", "--data"):
            training_db = arg
        elif opt in ("-r", "--results"):
            results_prefix = arg
        elif opt in ("-v", "--validate"):
            validate = True

    if not classify_mode and not training_mode:
        show_help()
        sys.exit(1)

    if classify_mode:
        classifier.classify(query_path, training_db, output_dir,
                            results_prefix, classify_mode_alg)
    elif training_mode:
        trainer.train_classifier(training_dir, output_dir, validate)
Beispiel #9
0
def main():
    '''
    Entry point of Postnovo app.
    '''

    start_time = time()

    test_argv = None

    userargs.setup(test_argv)

    if config.globals['Retrain']:
        classifier.train_models()
    else:
        input.parse()

        single_alg_prediction_df = singlealg.do_single_alg_procedure()
        ##REMOVE: for debugging
        #utils.save_pkl_objects(
        #    config.globals['Output Directory'],
        #    **{'single_alg_prediction_df.pkl': single_alg_prediction_df})
        ##REMOVE: for debugging
        #single_alg_prediction_df = utils.load_pkl_objects(
        #    config.globals['Output Directory'],
        #    'single_alg_prediction_df.pkl')

        consensus_prediction_df = consensus.do_consensus_procedure()
        ##REMOVE: for debugging
        #utils.save_pkl_objects(
        #    config.globals['Output Directory'],
        #    **{'consensus_prediction_df.pkl': consensus_prediction_df})
        ##REMOVE: for debugging
        #consensus_prediction_df = utils.load_pkl_objects(
        #    config.globals['Output Directory'],
        #    'consensus_prediction_df.pkl')

        prediction_df = pd.concat([
            single_alg_prediction_df.reset_index(),
            consensus_prediction_df.reset_index()
        ],
                                  ignore_index=True)
        if 'index' in prediction_df.columns:
            prediction_df.drop('index', axis=1, inplace=True)

        prediction_df = masstol.do_mass_tol_procedure(prediction_df)

        prediction_df = interspec.do_interspec_procedure(prediction_df)

        classifier.classify(prediction_df)
        ##REMOVE: for debugging
        #classifier.classify(None)

    print('Postnovo successfully completed')
    utils.verbose_print('Total time elapsed:', time() - start_time)

    return
Beispiel #10
0
def fetchProfiles(initURL, maxcount):
    """Given the URL from where to initiate the crawling, it first fetches the webpage, sends it to
    the crawler for scraping data from the webpage. Not only that, it also reads all the public profile
    urls present in the current page and adds them to the list. In subsequent iterations, it will fetch
    the LinkedIn profiles of people associated with these urls. The iteration continues for the number of
    times specified by maxcount"""
    count = 0
    links = set([initURL])
    waitinglist = list()

    start = datetime.now()

    while count < maxcount:
        count += 1

        while len(links) > 0:
            newreq = links.pop()
            if newreq not in waitinglist:  # If the url hasn't be used already, add it to the waiting list
                waitinglist.append(newreq)
                break

        try:
            page = urllib2.urlopen(waitinglist[-1]).read(
            )  # Fetch the web page from the url just appended
            scraper.scrape(
                page,
                waitinglist[-1])  # Send the page and the url for scraping

            if len(links) < 3:
                links.update(profileURL.findall(
                    page))  # Get all the urls present in this web page
        except:
            pass

        links = set([link.strip('"')
                     for link in links])  # String processing to remove quotes

        percentage = int(count * 100.0 / maxcount)  # Progress bar
        sys.stdout.write('\r' + '=' * percentage + '>' + ' ' *
                         (101 - percentage) + str(percentage) + '%')
        sys.stdout.flush()

    print 'Fetched', count, 'profiles in', \
     (datetime.now() - start).total_seconds(), 'seconds'

    start = datetime.now()
    classifier.classify(
    )  # Classify all profiles in the database [TODO: classify only updated portion of db]
    print 'Classified all profiles in database in', \
     (datetime.now() - start).total_seconds(), 'seconds'

    indexer.computeIndexes(
    )  # Compute indexes for every profile in the database [TODO: same as above]
    print 'Calculated indexes for all profiles in database in', \
     (datetime.now() - start).total_seconds(), 'seconds'
def main(train, classify, help):
    if (help):
        print(help_message)
        sys.exit(0)
    else:
        if (train):
            iteration = click.prompt('Iteration count for training model', type=int)
            trainer.train(num_iteration=iteration)
        else:
            image_file_path = click.prompt('Image file path that is going to be classified', type=str)
            classifier.classify(file_path=image_file_path)
Beispiel #12
0
def main(filename):
    print('Processing image...')
    page = preprocess(filename) # read in, clean image
    bboxes = get_bbox(page)  # get expanded bounding boxes around characters
    bboxes = merge(bboxes)  # merge expanded bounding boxes
    bboxes = sort_bbox(bboxes)  # sort characters to order that you'd read them in
    characters = resize(page, bboxes)  # resize to 28x28, centered in 20x20

    # Make each image take a single row in the big batch image by flattening the
    # width (2nd) and height (3rd) dimension.
    characters = np.reshape(characters, (len(characters), -1))

    print('Classifying characters...')
    # Return classified images as a string of ASCII characters.
    predictions = classify(characters)
    letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    text = []
    for pred in predictions:
        text.append(letters[int(pred)])
    text = ''.join(text)
    print('Writing to file...')
    # Write out to file
    base_filename = filename.split('.')[0]
    f = open('{}.txt'.format(base_filename), 'w+')
    f.write(text)
    f.close()
Beispiel #13
0
def main():
	# Downloads, extracts data from and classifies images
	count = 0

	if scanner.is_connected():
		count = len(scanner.get_image_list())

	while scanner.is_connected():
		new_count = len(scanner.get_image_list())
		while new_count > count:
			count = count + 2
			print('📸 NEW IMAGE DETECTED')

			latest_image = scanner.download_image(count)
			print('⬇️ NEW IMAGE TRANSFERRED')

			data = finder.extract_data_from(latest_image, 0)
			print('👀 DATA EXTRACTED')

			if len(data) > 0:
				print('FINDER OUTPUT', data)

				result = classifier.classify(data[0])
				print('✅ CLASSIFICATION COMPLETE')
				save(result)
			else:
				print('❌ NO EYES FOUND')
def train(config):
    print("\nSetting up net to train\n")
    #net variables
    training_src = config.train_dir
    batch_size = config.batchsize
    num_epochs = config.num_epochs
    image_size = config.image_size

    #create model
    nnmodel = createModel()
    print("Model created")
    #get training and input data
    classifications = classifier.classify(config.save_dir, "final_graph.pb",
                                          training_src)
    input_training, output_training = batch_create(training_src, image_size)
    nnmodel.fit(x=input_training,
                y=output_training,
                batch_size=batch_size,
                epochs=num_epochs,
                verbose=1)
    nnmodel.evaluate(input_training, output_training, batch_size=batch_size)
    print("Training finished")

    # save model
    save_model(config, nnmodel)
def regulate_spreadsheet():

    if request.method != 'POST':
        # abort(400, "Faça requisição via POST")
        return show_form()

    if "spreadsheet" in request.files:
        f = request.files['spreadsheet']

        data_df = pd.read_excel(f)

        texts = data_df["QUADROCLINICO"].values

        t = get_treshould()
        preds = classify(texts, t)

        save_copy(texts, preds, t)

        data_df["_APROVADO"] = preds["pred_bin"]
        data_df["_PROBA_0"] = [x[0] for x in preds["pred_probas"]]
        data_df["_PROBA_1"] = [x[1] for x in preds["pred_probas"]]

        tf = tempfile.NamedTemporaryFile(suffix=".xlsx")
        data_df.to_excel(tf.name, index=False)

        attachment_filename = "regulada_%s" % f.filename
        return send_file(tf.name,
                         as_attachment=True,
                         attachment_filename=attachment_filename)
def main():
    part2xy = load_dataset_fast('FILIMDB')
    train_ids, train_texts, train_labels = part2xy['train']

    print('\nTraining classifier on %d examples from train set ...' %
          len(train_texts))
    st = time()
    params = train(train_texts, train_labels)
    print('Classifier trained in %.2fs' % (time() - st))

    allpreds = []
    for part, (ids, x, y) in part2xy.items():
        print('\nClassifying %s set with %d examples ...' % (part, len(x)))
        st = time()
        preds = classify(x, params)
        print('%s set classified in %.2fs' % (part, time() - st))
        allpreds.extend(zip(ids, preds))

        if y is None:
            print('no labels for %s set' % part)
        else:
            score(preds, y)

    save_preds(allpreds, preds_fname=PREDS_FNAME)
    print('\nChecking saved predictions ...')
    score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')
Beispiel #17
0
def receive_message():
    if request.method == 'GET':
        print("GET")
        print("")
        """Before allowing people to message your bot, Facebook has implemented a verify token
        that confirms all requests that your bot receives came from Facebook.""" 
        token_sent = request.args.get("hub.verify_token")
        print(token_sent)
        return verify_fb_token(token_sent)
    #if the request was not get, it must be POST and we can just proceed with sending a message back to user
    else:
        # get whatever message a user sent the bot
        print("POST")
        print("")
    
        output = request.get_json()
        for event in output['entry']:
            messaging = event['messaging']
            for message in messaging:
                if message.get('message'):
                    #Facebook Messenger ID for user so we know where to send response back to
                    recipient_id = message['sender']['id']
                    if message['message'].get('text'):
                        response_sent_text = get_message()
                        send_message(recipient_id, response_sent_text)
                    #if user sends us a GIF, photo,video, or any other non-text item
                    if message['message'].get('attachments'):
                        #TODO: Handle non image messages
                        output=classifier.classify(message['message'].get('attachments')[0]['payload']['url'])
                        #classifier.classify([message['message'].get('attachments')['payload']])
                        #response_sent_nontext = get_message()
                        #send_message(recipient_id, response_sent_nontext)
                        send_message(recipient_id, output)
    return "Message Processed"
Beispiel #18
0
def butterflies():
    if request.method == 'POST':
        file = request.files['file']
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            file.save(filepath)
            filename = 'http://146.169.45.105/uploads/' + filename
            s = classifier.classify(SVM_PATH,VOCAB_PATH, filepath)
            res = s.split(',')
            try:
                page = wikipedia.page(res[0])
                url = page.url
                wiki = page.content
            except wikipedia.exceptions.PageError:
                page = ""
                url = ""
                wiki = ""
            #wiki = page.html()
            try:
                index = wiki.index('\n\n\n== See')
                wiki = wiki[:index]
                wiki = wiki.replace('\n\n\n==','<h2>')
                wiki = wiki.replace('==\n','</h2>')
            except ValueError:
                wiki = wiki.replace('\n\n\n==','<h2>')
                wiki = wiki.replace('==\n','</h2>')
            #return redirect(url_for('uploaded_file',filename=filename))
            if(len(res)>1):
                return render_template('results.html',res1=res[0],res2=res[1],filename=filename,wiki=wiki,url=url)
            else:
                return render_template('results.html',res1=res[0],res2=res[0],filename=filename,wiki=wiki,url=url)
    return render_template('butterflies.html')
Beispiel #19
0
def _classify():
    """Get endpoint to classify text. Expects application/json as content type.
    Expects the json to follow structure {"text": ["sentence to class 1", "sentence 2", ...]}
    """
    # Validate content type
    if not request.content_type.startswith('application/json'):
        raise TypeError('Request content type must be of application/json')
    request_data: Dict = request.json

    # Validate required json fields
    if 'text' not in request_data:
        raise ValueError('Missing required json attribute "text" in json call')
    to_classify: List[str] = request_data['text']

    # Validate contents of text to classify
    if not isinstance(to_classify, list):
        raise TypeError('Data provided in "text" must be a list')
    if not all([isinstance(x, str) for x in to_classify]):
        raise TypeError('List of instances contains non-string inputs')

    # TODO: Explain why the classify function is supplied the MODEL and VECTORIZER global objects from lines 7 and 8 instead of using the defaults?
    results: List[str] = [
        classify(x, model=MODEL, vectorizer=VECTORIZER) for x in to_classify
    ]

    # Check that we have full results for each item
    if len(results) != len(to_classify):
        raise ValueError('Result length does not match request length')

    return jsonify(results)
Beispiel #20
0
def tester():

    matrix, labels = file2matrix.file2matrix('../data/abalone')

    normalized_matrix, ranges, mins = normalize.normalize(matrix)

    number_of_rows = normalized_matrix.shape[0]

    number_of_test = int(number_of_rows * 0.1)

    error_count = 0.0

    for i in range(number_of_test):

        result = classifier.classify(
            normalized_matrix[i, :],
            normalized_matrix[number_of_test:number_of_rows, :],
            labels[number_of_test:number_of_rows], 20)

        print "the classifier's prediction is %d, the real answer is %d" % (
            result, labels[i])

        if (result != labels[i]):

            error_count = error_count + 1.0

    print "the error rate is %f" % (error_count / float(number_of_test))
Beispiel #21
0
def main():

    print '\n'

    if not os.path.exists(rp.target):
        os.makedirs(rp.target)

    table, positives, negatives, p_tweets, n_tweets = handler.load_table(
        "table/table.csv")
    filename = rp.target + "/tweets.csv"

    #Scrap Tweets and store them in csv file
    crawler.go_spider_go(filename,
                         rp.target,
                         retweets=rp.rt,
                         scroll_pause=float(rp.sp),
                         headless=rp.headless)

    tweets = handler.load_tweets(rp.target + "/tweets.csv")

    positive_ratio, group = classifier.classify(tweets, table, positives,
                                                negatives, p_tweets, n_tweets)

    tag = 'user' if '@' in rp.target else 'hashtag'

    print "%s is a  %s %s" % (rp.target, group.lower(), tag)
    print "Positive Ratio: %.2f\n" % positive_ratio
Beispiel #22
0
    def estimate_coverage(self, img, bbox, fast_mode=False):

        self.progressBar.show()
        self.progressBar.setValue(10)
        QtGui.QApplication.processEvents()
        x0 = bbox[0]
        y0 = bbox[1]
        x1 = bbox[2]
        y1 = bbox[3]
            
        img_bboard = img[int(y0):int(y1), int(x0):int(x1), :]
            
            
        response, raw_response, exemplar_colors = \
            classify(img_bboard, THRES_RAW, fast_mode=fast_mode)
        self.progressBar.setValue(65)
        QtGui.QApplication.processEvents()
        fraction = 0.0
                      
        if response is not None:

            segments = (response > THRES_KMS)
            fraction = np.mean(segments)             
            hist = np.float32(segments).mean(1)

        else:
            hist = None
            
        return fraction, segments, hist, response, raw_response       
Beispiel #23
0
	def calc(self):
		if self.tx == None:
			if self.fill_tx() == -1:
				return -1

		data = check_cache(self.address)

		self.label = get_label(self.address)
		self.balance = data["final_balance"]
		self.total_received = data["total_received"]
		self.total_sent = data["total_sent"]
		for t in self.tx:
			type = (reduce((lambda a,b:a or b[0]==self.address),t.inputs,False),
						reduce((lambda a,b:a or b[0]==self.address),t.outputs, False))

			for i in t.inputs:
				if i[0] == self.address:
					pass
				elif type[1]:
					self.receives_from.add(i[0])
			for i in t.outputs:
				if i[0] == self.address:
					pass
				elif type[0]:
					self.sends_to.add(i[0])
		
		self.classified_as = classifier.classify(self)
Beispiel #24
0
def fetchProfiles(initURL, maxcount):
    """Given the URL from where to initiate the crawling, it first fetches the webpage, sends it to
    the crawler for scraping data from the webpage. Not only that, it also reads all the public profile
    urls present in the current page and adds them to the list. In subsequent iterations, it will fetch
    the LinkedIn profiles of people associated with these urls. The iteration continues for the number of
    times specified by maxcount"""
    count = 0
    links = set([initURL])
    waitinglist = list()

    start = datetime.now()

    while count< maxcount:
        count += 1

        while len(links) > 0:
            newreq = links.pop()
            if newreq not in waitinglist:   # If the url hasn't be used already, add it to the waiting list
                waitinglist.append(newreq)
                break

        try:
            page = urllib2.urlopen(waitinglist[-1]).read() # Fetch the web page from the url just appended
            scraper.scrape(page, waitinglist[-1]) # Send the page and the url for scraping

            if len(links) < 3:
                links.update(profileURL.findall(page)) # Get all the urls present in this web page
        except:
            pass

        links = set([link.strip('"') for link in links]) # String processing to remove quotes

        percentage = int(count*100.0/maxcount)    # Progress bar
        sys.stdout.write('\r'+'='*percentage+'>'+' '*(101-percentage) +str(percentage)+'%')
        sys.stdout.flush()

    print 'Fetched', count, 'profiles in', \
     (datetime.now() - start).total_seconds(), 'seconds'

    start = datetime.now()
    classifier.classify() # Classify all profiles in the database [TODO: classify only updated portion of db]
    print 'Classified all profiles in database in', \
     (datetime.now() - start).total_seconds(), 'seconds'

    indexer.computeIndexes() # Compute indexes for every profile in the database [TODO: same as above]
    print 'Calculated indexes for all profiles in database in', \
     (datetime.now() - start).total_seconds(), 'seconds'
Beispiel #25
0
def identify(url):
    result = classifier.classify(url)
    # if True:
    if result:
        return render_template('results.html', url=url, result=result['title'], locations=locator.find(query=result['alias']))
        # return render_template('results.html', url=url, result=result['title'], locations=HARD)
    else:
        return render_template('unrecognized.html', url=url)
 def check_neg_surplus(b0):
     print("Running b0 =", b0)
     total_surplus = 0.0
     for i, features in enumerate(all_features):
         label, info = classify(features, all_templates, means, variances, deformation=deform_type, correct_label=all_labels[i], b0=b0, eta=eta0, rho=rho0, samples=samples, debug_plot=PLOT, threshold_multiple=alpha)
         total_surplus += info['surplus_change']
     print("Returning surplus", total_surplus)
     return -total_surplus
Beispiel #27
0
def classify(testpath):
    label = lookup_label(testpath)
    trainingpaths = [
        os.path.join(class_dir, path) for path in os.listdir(class_dir)
        if os.path.isfile(os.path.join(class_dir, path))
    ]
    predicted = classifier.classify(trainingpaths, testpath)
    return predicted, label
Beispiel #28
0
def upload():
    if request.method == 'POST':
        f = request.files['file']
        f.save(secure_filename(f.filename))  #Save image at the server
        preprocessor.detector(f.filename)
        words = classifier.classify()
        os.remove(f.filename)
        return render_template("result.html", data=words)
Beispiel #29
0
def classify():
    if bottle.request.method == 'OPTIONS':
        return {}
    try:
        img = utils.save_image(bottle.request)
        return classifier.classify(settings.UPLOADS + img.filename)
    except MissingFile:
        return {'error': 'missing image file'}
Beispiel #30
0
def accuracy_one_level(testing_file, classifier_name):

    with open(testing_file) as csvfile:
            n_row = 0
            counter = 0
            reader = csv.reader(csvfile)
            false_alerts_counter = 0  # number of predictions is 1,2 or 3 and the actual class is 0
            misplaced_alerts_counter = 0  # number of 
            missed_alerts_counter = 0  # number of predictions is 0 and the actual class is 1,2 or 3
            A = 0  # All that the prediction is different of 0
            T_1_2_3 = 0
            T_0 = 0
            # star process 
            for row in reader:
                    n_row += 1  # number of examples into the file 
                    actual_sentence = row[0]  # string of the example
                    actual_class = row[1]  # example that we already know  the class
                                                  
                    answer_class = classifier.classify(classifier_name, actual_sentence)
                    print("Actual Class: ", actual_class, " ", "Response Class: ", answer_class, "\n")
                    
                    # when the prediction is different of 0
                    if answer_class != '0':
                        A += 1
                    
                    if actual_class == '0':
                        T_0 += 1
                        
                    if actual_class == answer_class:  # asking for a hit
                        counter = counter + 1
                        # good prediction different of 0(misplaced_alert)
                        if actual_class != '0': 
                            misplaced_alerts_counter += 1  
                    # if the prediction is wrong        
                    else :
                        # when the class is 0 and the prediction is different of 0(false alert)
                        if actual_class == '0':
                            false_alerts_counter += 1
                        # when the actual class is 1,2 or 3 and it predict 0
                        if answer_class == '0':  # else is ok too i think need to see 
                            missed_alerts_counter += 1
            
            
            accuracy = (counter / n_row) * 100
            print('A : ', A)
            print('false alerts counter :' , false_alerts_counter)
            print('missed alerts counter :', missed_alerts_counter)
            print('misplaced alerts counter :', misplaced_alerts_counter)
            
            false_alerts = (false_alerts_counter / A) * 100
            misplaced_alerts = ((A - misplaced_alerts_counter) / A) * 100
            T_1_2_3 = n_row - T_0
            print('T_1_2_3 : ' , T_1_2_3)
            missed_alert = (missed_alerts_counter / T_1_2_3) * 100
            
            print("Results: ", "\n" , "Number of examples: ", n_row, "\n", "Number of hits: ", counter, '\n', "Accuracy: ", accuracy, "%", '\n', "False Alerts: ", false_alerts, "%", '\n', "Missed Alerts: ", missed_alert, "%" "misplaced Alerts: ", misplaced_alerts, "%", '\n');   
            
    return accuracy, false_alerts, misplaced_alerts, missed_alert
Beispiel #31
0
def identify_sentences(disease, text):

    tagged_text = symptoms_tagger(text)
    context = nltk.sent_tokenize(tagged_text)
    content = ""
    for sent in context:
        regex1 = re.compile(r'<symptom>')
        regex2 = re.compile(r'<gene>')
        if regex1.search(sent):
            # print(sent)
            content += " " + sent

        elif regex2.search(sent):
            # print(sent)
            content += " " + sent

    # for disease, abstract in Abstracts:
    sentences = nltk.sent_tokenize(content)
    # extract_sentence = []
    ext_sent = ""
    ext_sent2 = ""
    for sent in sentences:
        line = re.sub(r'<\/?[^>]*>', '', sent)
        # print(line)
        category = classify(line)
        # print(category)
        if category:
            if category == "symptoms":
                ext_sent += " " + sent
            elif category == "gene":
                # print(sent)
                ext_sent2 += " " + sent
        else:
            # get d_s relation patterns and check with each sentences
            for pattern in ds_patterns:
                regex2 = re.compile(r'(?i)\b(%s)\b' % pattern)
                if regex2.search(line):
                    # print(sent)
                    # if relation in sent:
                    ext_sent += " " + sent
                    break

            # get d_g relation patterns and check with each sentences
            for pattern in dg_patterns:
                regex3 = re.compile(r'(?i)\b(%s)\b' % pattern)
                if regex3.search(line):
                    # if relation in sent:
                    ext_sent2 += " " + sent
                    break
    entity_set = identify_entities(disease, ext_sent, ext_sent2)
    return entity_set


#
# text = "It can make it hard to breathe, too, and can cause wheezing, fever, tiredness, and chest pain. The disease happens when the lining of the airways in your lungs gets irritated."
#
# identify_sentences("dengu", text)
Beispiel #32
0
def run(image_path):
    image_path_ = image_path.rsplit('.', 1)[0]
    preprocess_image_path = image_path_ + '_bw.jpg'
    audio_path = image_path_ + '.wav'

    image = cv2.imread(image_path)

    image_bw = segmenter.preprocess(image)

    cv2.imwrite(preprocess_image_path, image_bw)

    lines = segmenter.segment_lines(image_bw)

    #classified_text = "රජගහා විහාරෆ හඤූතර තරඔක් කදූභැටියරී වර පසින් ඇහ් අ"
    classified_text = ""

    for i, line in enumerate(lines):
        character_images = segmenter.segment_line(line, i)
        for character_image in character_images:
            classified_text += classifier.classify(character_image)
            #classified_text += "2"

    #classified_text = "රජගහා විහාරෆ හඤූතර තරඔක් කදූභැටියරී වර පසින් ඇහ්"

    # remove extra spaces

    #classified_text = ""
    classified_text = classified_text.strip()
    classified_text = " ".join(classified_text.split())

    #join modifiers
    classified_text = classifier.join_modifiers(classified_text)

    print classified_text

    corrected_words = corrector.correct(classified_text)

    corrected_text = ""

    for words in corrected_words:
        corrected_text += words[0].encode("utf-8") + " "

    print corrected_text

    synthesized_data = synthesizer.synthesize(corrected_text)

    audio_outfile = wave.open(audio_path, 'wb')

    audio_outfile.setparams(synthesized_data[0][0])

    for i in range(0, len(synthesized_data), 1):
        audio_outfile.writeframes(synthesized_data[i][1])

    # classified_text = "රජගහා විහාරෆ හඤූතර තරඔක් කදූභැටියරී වර පසින් ඇහ් අ"
    # corrected_words = [[1111111,2,"old1"],[3233333,4,"old2"]]

    return classified_text, corrected_words, image_path_, audio_path
Beispiel #33
0
def main(path):

    if 'q' not in request.args:
        return jsonify(status_code=400, msg="Missing parameter 'q'")

    q = request.args.get('q')
    res = classifier.classify(q)

    return jsonify(**res)
Beispiel #34
0
 def classifyFace():
     print("Classifying Face")
     prediction = classify(filename,
                           "./tf/training_output/retrained_graph.pb",
                           "./tf/training_output/retrained_labels.txt",
                           shape=224)
     nonlocal text
     text = prediction[0][0]
     print("Finished classifying with text: " + text)
def main(path):
    
    if 'q' not in request.args:
        return jsonify(status_code=400,
                       msg="Missing parameter 'q'")

    q = request.args.get('q')
    res = classifier.classify(q)

    return jsonify(**res)
Beispiel #36
0
    def get_frame(self):
        success, frame = self.video.read()

        if success:
            # We are using Motion JPEG, but OpenCV defaults to capture raw images,
            # so we must encode it into JPEG in order to correctly display the
            # video stream.
            frame = classifier.classify(frame, self.face_detector, self.model)
            ret, jpeg = cv2.imencode('.jpg', frame)
            return jpeg.tobytes()
Beispiel #37
0
 def test_classify(self):
     # A quick integration test to ensure that all the sub-parts are included.
     # If this test fails, a smaller unit test SHOULD fail as well.
     self.assertEqual(
         classifier.classify([
             ('pull_request', {
                 'pull_request': {
                     'state': 'open',
                     'user': {
                         'login': '******'
                     },
                     'assignees': [{
                         'login': '******'
                     }],
                     'title': 'some fix',
                     'head': {
                         'sha': 'abcdef'
                     },
                     'additions': 1,
                     'deletions': 1,
                 }
             }, 1),
             make_comment_event(
                 1,
                 'k8s-bot',
                 'failure in https://k8s-gubernator.appspot.com/build/bucket/job/123/',
                 ts=2),
             ('pull_request', {
                 'action': 'labeled',
                 'label': {
                     'name': 'release-note-none',
                     'color': 'orange'
                 },
             }, 3)
         ], {'e2e': ['failure', None, 'stuff is broken']}),
         (True, True, ['a', 'b'], {
             'author': 'a',
             'assignees': ['b'],
             'additions': 1,
             'deletions': 1,
             'attn': {
                 'a': 'fix tests',
                 'b': 'needs review#0#0'
             },
             'title': 'some fix',
             'labels': {
                 'release-note-none': 'orange'
             },
             'head': 'abcdef',
             'needs_rebase': False,
             'status': {
                 'e2e': ['failure', None, 'stuff is broken']
             },
             'xrefs': ['/bucket/job/123'],
         }))
Beispiel #38
0
def predict(path):

    for dirpath, dirnames, filenames in os.walk(path):
        for inputFile in filenames:
            # avoid unwanted files. Match just files: "filename.c"
            m = re.match("[^#]*\.c$",inputFile)
            if m:
                # print("%s/%s" % (dirpath,inputFile))

                filename = "%s/%s" % (dirpath,inputFile)
                testHeader = readTestFileHeader(filename)
                # featureVector = sca.analyzeCode(filename)
                sys.stdout.write("file: %s/%s \n" % (dirpath,inputFile))
                # print(featureVector)
                # featureMatrix.append(featureVector)
                # labelVector.append(testHeader[1])
                sys.stdout.write("\t")
                print(testHeader)
        
                clf.classify(tree,testHeader[0])
def main():
	loadData();
	data = getData();

	NBresults = list()
	SVMresults = list()
	for x in xrange(0,10):
		(train_set,test_set) = splitData(data)
		
		classifier = getClassifier(train_set,'NaiveBayes')
		NBresults.append(classify(classifier,test_set))

		classifier = getClassifier(train_set,'SVM')
		SVMresults.append(classify(classifier,test_set))
	
	print '\nNaive Bayes Classifier'
	printTable(NBresults)

	print '\nSVM'
	printTable(SVMresults)
Beispiel #40
0
def main():
    loadData()
    data = getData()

    NBresults = list()
    SVMresults = list()
    for x in xrange(0, 10):
        (train_set, test_set) = splitData(data)

        classifier = getClassifier(train_set, 'NaiveBayes')
        NBresults.append(classify(classifier, test_set))

        classifier = getClassifier(train_set, 'SVM')
        SVMresults.append(classify(classifier, test_set))

    print '\nNaive Bayes Classifier'
    printTable(NBresults)

    print '\nSVM'
    printTable(SVMresults)
Beispiel #41
0
def classify_input():
    global X, d, user_data_path

    # if user training data unavailable, throw error. This means user didn't bother training.
    if(not os.path.exists(user_data_path + user + ".csv") or len(classifier.c_classes) <= 1):
        print("NO PROPER TRAINING DATA AVIALABLE. PLEASE TRAIN.")
        return

    # get class labels
    d = classifier.classify(X)

    print(d)
Beispiel #42
0
def calculate_page_percentages(entries):
    """
    create report by events per page

    :param entries: an iterable of LogEntry
    """
    classified_log = classify(entries)

    page_occurrence_result = page_occurrence(classified_log)

    result = calculate_percentage(classified_log, page_occurrence_result)

    return result
Beispiel #43
0
def get_subject(soup):
	# get_subject
	try:
		a = soup.find('div', {'class':'lockup product application'})
		subject = a.find('span', {'class':'label'}).next_sibling.get_text()
		if subject == ('Education' or 'Reference'):
			subject = classifier.classify(get_description(soup))
			print "This app was classified as '%s'!" % subject
			return subject
		else:
			return "NotEducation"
	except (AttributeError, ValueError, TypeError):
		print "Error retrieving subject for %s." % name
		return ''
Beispiel #44
0
def test_feature_set_performance(target_label=info.LABEL_GEN, base_fset=param.FEATURE_SET_ORIGINAL,
                                 reduced=False, fillna=True, scaling=True,
                                 feat_sel=False, feat_num=None, with_pred=False, clf_name='LR', reg_param=1.0):
    p_df = loader.load_profile_info()
    print "profile information is loaded"

    feature_set_name = base_fset
    feature_set_name += param.REDUCED_SUFFIX if reduced else ""
    feature_set_name += param.FILL_SUFFIX if fillna else ""
    feature_set_name += param.SCALING_SUFFIX if scaling else ""
    print feature_set_name

    user_feature_df = loader.read_csv_feature_set(feature_set_name)
    user_feature_df.columns = map(lambda x: int(x), user_feature_df.columns)
    # user_feature_df = loader.read_csv_feature_set(param.FEATURE_SET_EXTENSION_APP,
    #                                               fill_na=True, normalize=False)
    print "user feature data set is loaded"

    if with_pred:
        pred_df = pd.read_csv(param.DATA_PATH + "/predictions/item_preds.csv", index_col='profile_id')
        # pred_df = pd.read_csv(param.DATA_PATH + "/predictions/content_preds.csv", index_col='profile_id')
        for col in pred_df.columns:
            uls = list(pred_df[col].unique())
            uls.sort()
            pred_df.loc[:, col] = pred_df[col].apply(lambda x: uls.index(x))

        p_df = p_df.loc[pred_df.index]
        user_feature_df = user_feature_df[pred_df.index]

        pred_df.columns = [[info.APP] * len(pred_df.columns), ['itemBased_prediction'] * len(pred_df.columns),
                           list(pred_df.columns), [feat.NOMINAL_VAL] * len(pred_df.columns)]
        pred_df.columns.names = ['modality', 'field', 'feature', 'type']
        user_feature_df = pd.concat([user_feature_df.T, pred_df], axis=1).T

    # method_types = ["LR", "MI", "MI-min10", "FS", "FS-min10", "RF-100"]
    method_type = "MI" if feat_sel else None
    cv = 10
    repeat_num = 20
    nf = feat_num if feat_sel else None

    if not os.path.isdir(param.EXPERIMENT_PATH):
        os.makedirs(param.EXPERIMENT_PATH)

    print "\nlabel, fillna, scaling, feat_sel, clf_name, reg_param, k-CV, ith-fold, featNum, accuracy"

    for repeat in range(repeat_num):
        temp_score = clf.classify(user_feature_df, p_df, feature_set_name, features=None, label=target_label,
                                  reg_param=reg_param, selection=feat_sel, num_feat=nf,
                                  sel_method=method_type, cv=cv)
Beispiel #45
0
def testClassifier(classifier):
  print("testing classifier with test data, please wait\n")

  maxwin = 0.0
  maxlose = 0.0
  bestwin=""
  bestlose=""

  total = 0
  right = 0
  trainData = parsedata.readAllGames(True,exclusions)
  counter = 0
  for k in iter(trainData):
    guess,prob = classifier.classify(k)
    correct = trainData[k]
    counter += 1
    for game in correct:
      if guess == (game > 0):
        right+=1
        if prob > maxwin:
          maxwin = prob
          bestwin = k
      else:
        if prob > maxlose:
          bestlose = k
          maxlose = prob
      total += 1
      con[guess][game>0] += 1
      con[game>0][guess] += 1

    if counter % 10000 == 0: 
      print("{}, ".format(counter),end='',flush=True)
    if counter % 50000 == 0:
      print()


  #printConfusion(con)
  #pprint(con)
  # printing the prototypical images

  print()
  print(round(right / total,4)*100, ' % accuracy')
Beispiel #46
0
 def test_classify(self):
     # A quick integration test to ensure that all the sub-parts are included.
     # If this test fails, a smaller unit test SHOULD fail as well.
     self.assertEqual(classifier.classify([
             ('pull_request', {
                 'pull_request': {
                     'state': 'open',
                     'user': {'login': '******'},
                     'assignees': [{'login': '******'}],
                     'title': 'some fix',
                     'head': {'sha': 'abcdef'},
                     'additions': 1,
                     'deletions': 1,
                     'milestone': {'title': 'v1.10'},
                 }
             }, 1),
             make_comment_event(1, 'k8s-bot',
                 'failure in https://gubernator.k8s.io/build/bucket/job/123/', ts=2),
             ('pull_request', {
                 'action': 'labeled',
                 'label': {'name': 'release-note-none', 'color': 'orange'},
             }, 3),
             make_comment_event(2, 'k8s-merge-robot', '<!-- META={"approvers":["o"]} -->', ts=4),
         ], status_fetcher={'abcdef': {'e2e': ['failure', None, 'stuff is broken']}}.get
     ),
     (True, True, ['a', 'b', 'o'],
      {
         'author': 'a',
         'approvers': ['o'],
         'assignees': ['b'],
         'additions': 1,
         'deletions': 1,
         'attn': {'a': 'fix tests', 'b': 'needs review#0#0', 'o': 'needs approval'},
         'title': 'some fix',
         'labels': {'release-note-none': 'orange'},
         'head': 'abcdef',
         'needs_rebase': False,
         'status': {'e2e': ['failure', None, 'stuff is broken']},
         'xrefs': ['/bucket/job/123'],
         'milestone': 'v1.10',
     }))
def home():
	form = ImageForm()
	if request.method == 'POST':
		image_file = form.image.data
		extension = os.path.splitext(image_file.filename)[1]
		filepath = os.path.join(UPLOAD_FOLDER, \
			datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S%f')) + extension
		image_file.save(filepath)
		pre_process(filepath).save(filepath)

		image_files = [filepath]
		classifications = classifier.classify(
			caffemodel=CAFFE_MODEL, 
			deploy_file=DEPLOY_FILE, 
			image_files=image_files, 
			labels_file=LABELS_FILE,
			mean_file=MEAN_FILE, 
			use_gpu=True
		)

		return render_template('show.html', classifications=classifications)
	else:
		return render_template('home.html')
 def test_classify(self):
     # A quick integration test to ensure that all the sub-parts are included.
     # If this test fails, a smaller unit test SHOULD fail as well.
     self.assertEqual(classifier.classify([
             ('pull_request', {
                 'pull_request': {
                     'state': 'open',
                     'user': {'login': '******'},
                     'assignees': [{'login': '******'}],
                     'title': 'some fix',
                     'head': {'sha': 'abcdef'},
                     'additions': 1,
                     'deletions': 1,
                 }
             }),
             make_comment_event(1, 'k8s-bot',
                 'failure in https://k8s-gubernator.appspot.com/build/bucket/job/123/'),
             ('pull_request', {
                 'action': 'labeled',
                 'label': {'name': 'release-note-none', 'color': 'orange'},
             })
         ], {'e2e': ['failure', None, 'stuff is broken']}
     ),
     (True, True, ['a', 'b'],
      {
         'author': 'a',
         'assignees': ['b'],
         'additions': 1,
         'deletions': 1,
         'attn': {'a': 'fix tests', 'b': 'needs review'},
         'title': 'some fix',
         'labels': {'release-note-none': 'orange'},
         'head': 'abcdef',
         'needs_rebase': False,
         'status': {'e2e': ['failure', None, 'stuff is broken']},
         'xrefs': ['/bucket/job/123'],
     }))
Beispiel #49
0
def draw_box(img, result):
    img_w, img_h = img.size

    # calculate the tightest valid boundary for this image
    tvb_x1 = 0  # for width
    tvb_x2 = 0
    tvb_y1 = 0  # for height
    tvb_y2 = 0
    for i in range(0, len(result)):
        rbtx = result[i]["boundingbox"]["tl"]["x"]
        rbty = result[i]["boundingbox"]["tl"]["y"]
        rbsw = result[i]["boundingbox"]["size"]["width"]
        rbsh = result[i]["boundingbox"]["size"]["height"]
        tvb_y1 = max(tvb_y1, -(rbty - ABOVE_H - WINDOW_H))
        tvb_x1 = max(tvb_x1, -(rbtx + rbsw / 2 - WINDOW_W / 2))
        tvb_x2 = max(tvb_x2, rbtx + rbsw / 2 + WINDOW_W / 2 - img_w)

    bg = Image.new("RGBA", (int(img_w + tvb_x1 + tvb_x2), int(img_h + tvb_y1)), (255, 255, 255, 255))
    bg_w, bg_h = bg.size
    offset = (int(tvb_x1), int(tvb_y1))
    bg.paste(img, offset)
    img = bg
    offset = {"x": offset[0], "y": offset[1]}

    draw = ImageDraw.Draw(img)

    prandom_init()
    attrs = []
    for i in range(0, len(result)):
        attrs.append(classifier.classify(result[i]))

        # A algorithm to assure the diversity of attributes:
        # 	if there are multiple same attributes, select one of them,
        # 	change the attributes of the corresponding people to a
        # 	secondary attributes. Repeat.
    np = len(result)
    ca = []
    for i in range(0, np):
        ca.append(0)
    while True:
        """
		print "======================"
		for i in range(0, np):
			print attrs[i][ca[i]][0]
		print "======================="
		"""
        stop = True
        for i in range(0, np):
            # get the index of the best attribute
            idxi = attrs[i][ca[i]][2]
            eq = [i]
            for j in range(i + 1, np):
                idxj = attrs[j][ca[j]][2]
                if idxi == idxj:
                    eq.append(j)
            if len(eq) > 1:
                s = int(prandom() * len(eq))
                ca[eq[s]] += 1
                stop = False
                break
        if stop:
            break

    for i in range(0, len(result)):
        text_fn = LABEL_FN + str(attrs[i][ca[i]][2]) + ".png"
        img = draw_single_box(img, draw, result[i]["boundingbox"], offset, Image.open(text_fn))
    return img
Beispiel #50
0
def driver(filename, n):

    X, y, X_hold = pullData.feed_clf(filename)
    y_p = classifier.classify(X, y, X_hold)
    for i in range(len(y_p)):
        print X_hold[i], y_p[i]
Beispiel #51
0
import urllib
import xml.etree.ElementTree as ET
from apscheduler.schedulers.blocking import BlockingScheduler
from sklearn import svm, linear_model
from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
import fetcher
import xml_parser
import classifier

file_handler_match_id = open('match_ids.txt','r')
file_handler_match_id_manu = open('post_mid_term_ids.txt','r')
#file_handler_features = open('match_features.csv','a')
file_handler_features = open('feature_vectors.csv','r')

file_handler_features_heroes = open('bigram_features.csv','r')

'''
fetcher.get_match_recursively()
apsched.add_job(fetcher.get_match_recursively, trigger='interval', seconds=1200)
apsched.start() # will block
'''

#x = fetcher.populate_match_details()

classifier.classify(file_handler_features_heroes,file_handler_features)

#fetcher.get_xml_games(file_handler_match_id_manu)


Beispiel #52
0
	def get(self, addr):
		a = Address(addr)
		a.calc()
		self.response.out.write(classifier.classify(a))
		self.response.out.write("<br/>")
		self.response.out.write(classifier.extract_features(a))
def _echo(bot, update):
    print('Got a message!')
    print("I't is a text one!")
    response = bot.dialog.conversation(
        bot.dialog_id,
        update.message.text, bot.client_id,
        bot.conversation_id)
    print(response)
    text = response['response']
    print('Got a response from Dialog')

    print('Checking for profile variables')
    variables = bot.dialog.get_profile(
        bot.dialog_id, bot.client_id)
    print('Profile variables ' + str(variables))

    got_twitter = variables and variables['name_values'] \
            and variables['name_values'][0] \
            and variables['name_values'][0]['name'] \
            and variables['name_values'][0]['name'] == 'TwitterAccount' \
            and variables['name_values'][0]['value'] \
            and variables['name_values'][0]['value'].startswith('@')

    if got_twitter:
        for message in text:
            bot.sendMessage(update.message.chat_id, text=str(message))

        print('It\'s twitter!')

        tweets = get_all_tweets(update.message.text[1:])
        if not tweets:
            text = ['Parsing error occured!']
        else:
            print(tweets[:2])
            joined_text = ''.join(tweets)
            print(joined_text)
            feature_vector = get_personality_vector(joined_text)
            print(feature_vector)
            if feature_vector is not None:
                print(type(feature_vector))
                print(feature_vector.shape)
                recommendation = classify(feature_vector)
                print(recommendation)

                if recommendation:
                    print(recommendation)
                    print('Updating profile variables')
                    response_profile = bot.dialog.update_profile(bot.dialog_id,
                                                                 {'UniversityName': recommendation['name'],
                                                                  'UniversityWebsite': recommendation['url']},
                                                                 bot.client_id)
                    print('Sending continue constant to conversation')
                    response = bot.dialog.conversation(bot.dialog_id,
                                                       CONTINUE_CONSTANT, bot.client_id,
                                                       bot.conversation_id)
                    text = response['response']
            else:
                text = ["Twitter don't pass minimum requirement of 100 words. Specify another account"]
    print('Sending a message')
    print(str(text))
    for message in text:
        bot.sendMessage(update.message.chat_id, text=str(message))

# else:
#     bot.sendMessage(update.message.chat_id, text="Don't know! Rly, help me and try again!")
    print('Message sent!')
Beispiel #54
0
def main(argv):
    inputfile = ''
    outputfile = ''
    modelfile = ''

    if len(sys.argv) == 5:
        try:
            opts, args = getopt.getopt(argv, "hi:o:", ["ifile=", "ofile="])
        except getopt.GetoptError:
            print 'error'
            sys.exit(2)
        for opt, arg in opts:
            if opt == '-h':
                print 'main.py -i /path/to/training/corpus -o path/to/output/directory'
                sys.exit()
            elif opt in ("-i", "--ifile"):
                inputfile = arg
            elif opt in ("-o", "--ofile"):
                outputfile = arg

        print 'Input file is', inputfile
        print 'Output file is', outputfile

        feature_extractor.write_to_file(inputfile, outputfile)

        print("creating lexicon for training")
        lexicon_8char = feature_extractor.tfidf_lexicon_8char(inputfile)
        lexicon_3char = feature_extractor.tfidf_lexicon_3char(inputfile)
        lexicon_bigram = feature_extractor.tfidf_lexicon_2word(inputfile)
        lexicon_unigram = feature_extractor.tfidf_lexicon_unigram_word(inputfile)
        print("creating feature vector for training data")
        sim_score = comparison.sim_score(inputfile, lexicon_8char, lexicon_3char, lexicon_bigram, lexicon_unigram)
        print(sim_score)
        truth = feature_extractor.truth(inputfile)
        #print(truth)
        print("training the classifier")
        model_path = classifier.classify(sim_score, truth, outputfile)
        print("finish training")
        print("classifier model is saved in", model_path)

    elif len(sys.argv) == 7:
        try:
            opts, args = getopt.getopt(argv, "hi:m:o:", ["ifile=", "mfile=", "ofile="])
        except getopt.GetoptError:
            print 'error'
            sys.exit(2)
        for opt, arg in opts:
            if opt == '-h':
                print 'main.py -i /path/to/test/corpus -m path/to/classifier/model -o path/to/output/directory'
                sys.exit()
            elif opt in ("-i", "--ifile"):
                inputfile = arg
            elif opt in ("-m", "--mfile"):
                modelfile = arg
            elif opt in ("-o", "--ofile"):
                outputfile = arg

        print 'Input file is', inputfile
        print 'Model file is', modelfile
        print 'Output file is', outputfile
        file_path = ''
        for subdir, dirs, files in os.walk(modelfile):
            print("this is test")
            print(subdir)
            print (files)
            for file_ in files:
                print(file_)
                if file_ == 'train.txt':
                    file_path = subdir + os.path.sep + file_
        #print(file_path)
        opened_file = io.open(file_path, 'r')
        path_train = opened_file.read()
        print(path_train)
        lexicon_8char = feature_extractor.tfidf_lexicon_8char(path_train)
        lexicon_3char = feature_extractor.tfidf_lexicon_3char(path_train)
        lexicon_bigram = feature_extractor.tfidf_lexicon_2word(path_train)
        lexicon_unigram = feature_extractor.tfidf_lexicon_unigram_word(path_train)
        print("creating feature vector for test data")
        sim_score = comparison.sim_score(inputfile, lexicon_8char, lexicon_3char, lexicon_bigram, lexicon_unigram)
        print(sim_score)
        print("classify test data")
        output_path = classifier.classifier_predict(modelfile, sim_score, outputfile)
        print("finish")
        print "answer is saved in: ", output_path
            print("b0:", b0, "Total surplus:", total_surplus)
            ys[i] = total_surplus

        np.savez(surplus_file, b=bs, surplus=ys)

elif inspect_component is not None:
    #testing_digits, testing_labels = ag.io.load_mnist('testing', indices=slice(None, 10))
    #ag.io.load_mnist('testing', indices=inspect_component)
    #digits, labels = ag.io.load_mnist('testing')
    #digits = ag.util.zeropad(digits, (0, 2, 2))
    #digit, correct_label = digits[inspect_component], labels[inspect_component]

    features, correct_label = all_features[inspect_component], all_labels[inspect_component]

    # TODO: Does not work
    label, info = classify(features, all_templates, means, variances, samples, deformation=deform_type, correct_label=correct_label, debug_plot=PLOT)

    print("Digit: {0}".format(correct_label))
    print("Classified as: {0}".format(label))
    print(info['comp'])
else:
    N = len(all_features)
    c = 0
    num_deformed = 0
    num_contendors = 0
    incorrect_and_undeformed = 0
    turned_correct = 0
    turned_incorrect = 0
    #all_templates = np.clip(all_templates, eps, 1.0 - eps)
    for i, features in enumerate(all_features):
        additional = {}
Beispiel #56
0
import classifier
import sys
import yaml
from PIL import Image, ImageDraw, ImageColor
import colorsys

config     = yaml.safe_load(open("config.yml"))
w          = config['training']['patch_width']
h          = config['training']['patch_height']
input_path = sys.argv[1]

predictions = classifier.classify(input_path)

source = Image.open(input_path).convert('RGB')

heat_map = Image.new('RGB', source.size, (0,0,0))

draw = ImageDraw.Draw(heat_map, "RGBA")
for prediction, x, y in predictions:
    draw.rectangle(
        [x - w/2, y - h/2, x + w/2, y + h/2],
        fill=(255,255,255, int(prediction * 256))
    )

data = heat_map.load()
source_data = source.load()
for x in range(heat_map.size[0]):
    for y in range(heat_map.size[1]):
        h, l, s = colorsys.rgb_to_hls(
            source_data[x, y][0] / 255.0,
            source_data[x, y][1] / 255.0,
Beispiel #57
0
 def POST(self):
     cat = classifier.classify(web.input()['message'])
     print(cat)
     return cat
Beispiel #58
0
def classify(item):
  return classifier.classify(item)
Beispiel #59
0
mid_right = push(np_edges, 0, -1)
lower_left = push(np_edges, -1, 1)
lower_center = push(np_edges, -1, 0)
lower_right = push(np_edges, -1, -1)
vfunc = np.vectorize(find_orientation)

orientations = vfunc(upper_left, upper_center, upper_right, mid_left, mid_right, lower_left, lower_center, lower_right)

# Create the area to draw the classification results
canvas_height = 50
canvas_width = 150
canvas_offset = '%d,%d' % (thumbnail_width - canvas_width / 2, thumbnail_height - canvas_height / 2)
print canvas_offset
canvas = Tkinter.Canvas(root, offset=canvas_offset, width=canvas_width, height=canvas_height, bg='white')
canvas.pack()

# Call the classifier to detect what kind of object this is
from classifier import ObjectClassifier
classifier = ObjectClassifier()
result = classifier.classify(np_edges, orientations)

canvas.create_text(canvas_width / 2, canvas_height/2, text=str(result) + '!')

print 'Pixels:'
print np_edges

print 'Orientations:'
print orientations

root.mainloop() # wait until user clicks the window
Beispiel #60
0
from preco.TImage import TImage

from classifier import classify


if __name__ == '__main__':
    files_list = ['1-2.jpg', '3-1.jpg', '5-1.jpg', '5-2.jpg', '7-1.jpg', '8-1.jpg', '8-2.jpg']
    for file in files_list:
        timage = TImage(file)
        cv2.imshow('image', timage.img)
        cv2.waitKey(0)
        #img = timage.img
        #img = cv2.resize(timage.img, (28, 28));
        z = [px / 255.0 for px in cv2.resize(timage.img, (28, 28)).flatten('C').tolist()]
        probs = classify(z)
        max_value = max(probs)
        #print probs
        img = Image.open(file)
        draw = ImageDraw.Draw(img)
        
        if max_value < 0.5:
            s = 'ERROR'
        else:
            s = 'DIGIT: %i' % probs.index(max_value)
        print s
        font = ImageFont.truetype("ARIAL.TTF", 20)
        draw.text((0, 0), s, (0, 255, 0), font=font)
        img.save('sample-out.jpg')
        cv2.imshow('image', TImage('sample-out.jpg').img)
        cv2.waitKey(0)