Esempio n. 1
0
def main():
    '''
    INPUT: None
    OUTPUT: Recommendations sorted in order of relevance
    
    Uses the TextParser and Recommender classes to generate resource recommendations given a user's Quora data
    '''

    read = TextParser()
    read.assemble_df()
    pickle.dump(read.df, open("data/master_df.pkl", "wb"))
    quora_user = open('data/quora_data.pkl')
    quora = pickle.load(quora_user)
    filtered = read.preprocess_quora(quora)
    clean_quora = read.clean_up(filtered)
    pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb"))

    # Make recommendations
    rec = Recommender()
    test = rec.vectorize()
    top_ten_ind = rec.recommend()
    recs = read.df.ix[top_ten_ind]
    recs = recs.reset_index()
    recs['img_link'] = map(get_image, recs['title'])
    recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg'
    pickle.dump(recs[0:5], open("data/recs.pkl", "wb"))
    print "These are your recommendations: \n"
    print recs[['title', 'type', 'img_link']]
    return recs[['title', 'type', 'img_link']]
Esempio n. 2
0
def index_documents(options, args):
    for arg in args:
        mime_type = TikaParser.get_mime_type(arg)

        (text, meta) = TikaParser.parse(arg)

        if not text.keys():
            if 'text/plain' in meta['content_type']:
                (text, txt_meta) = TextParser.parse(arg)
                meta.update(txt_meta)
            elif 'vnd.oasis.opendocument' in meta['content_type']:
                (text, od_meta) = OpenDocumentParser.parse(arg)
                meta.update(od_meta)

        (mpty, fs_meta) = FsMetaParser.parse(arg)
        meta.update(fs_meta)

        meta['content_type'] = mime_type

        for field in meta:
            print("{}: {}".format(field, meta.get(field)))

        # if meta.get('content_type', '') == 'application/pdf':
        #     es_index(text, meta, doctype='pdf', options=options)

        print(text)
        exit()
Esempio n. 3
0
def recommend():
	user_data = str(request.form['user_input'].encode('utf-8'))

	# --- Drive to the given URL, scrape and generate recs -- #
	scraped = profile_crawl(user_data)
	quora = scraped['text']
	
	# Read and clean Quora dump recommendations
	read = TextParser()
	read.df = master_df
	filtered = read.preprocess_quora(quora)
	clean_quora = read.clean_up(filtered)
	pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb"))
	rec = Recommender()
	test = rec.vectorize()
	top_ten_ind = rec.recommend()
	recs = read.df.ix[top_ten_ind]
	recs = recs.reset_index()
	recs['img_link'] = map(get_image, recs['title'])
	recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg'
	recs = recs[0:20] # Get the top twenty recommendations
	return render_template('testing.html', data = recs[['title', 'img_link']].values)
Esempio n. 4
0
 def get_terms(self, query_text):
     query_terms = TextParser.parse(query_text)
     query_term_ids = [self.term_store.get_id_for_term(term) for term in
             query_terms]
     return query_term_ids