def main(): ''' INPUT: None OUTPUT: Recommendations sorted in order of relevance Uses the TextParser and Recommender classes to generate resource recommendations given a user's Quora data ''' read = TextParser() read.assemble_df() pickle.dump(read.df, open("data/master_df.pkl", "wb")) quora_user = open('data/quora_data.pkl') quora = pickle.load(quora_user) filtered = read.preprocess_quora(quora) clean_quora = read.clean_up(filtered) pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb")) # Make recommendations rec = Recommender() test = rec.vectorize() top_ten_ind = rec.recommend() recs = read.df.ix[top_ten_ind] recs = recs.reset_index() recs['img_link'] = map(get_image, recs['title']) recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg' pickle.dump(recs[0:5], open("data/recs.pkl", "wb")) print "These are your recommendations: \n" print recs[['title', 'type', 'img_link']] return recs[['title', 'type', 'img_link']]
def index_documents(options, args): for arg in args: mime_type = TikaParser.get_mime_type(arg) (text, meta) = TikaParser.parse(arg) if not text.keys(): if 'text/plain' in meta['content_type']: (text, txt_meta) = TextParser.parse(arg) meta.update(txt_meta) elif 'vnd.oasis.opendocument' in meta['content_type']: (text, od_meta) = OpenDocumentParser.parse(arg) meta.update(od_meta) (mpty, fs_meta) = FsMetaParser.parse(arg) meta.update(fs_meta) meta['content_type'] = mime_type for field in meta: print("{}: {}".format(field, meta.get(field))) # if meta.get('content_type', '') == 'application/pdf': # es_index(text, meta, doctype='pdf', options=options) print(text) exit()
def recommend(): user_data = str(request.form['user_input'].encode('utf-8')) # --- Drive to the given URL, scrape and generate recs -- # scraped = profile_crawl(user_data) quora = scraped['text'] # Read and clean Quora dump recommendations read = TextParser() read.df = master_df filtered = read.preprocess_quora(quora) clean_quora = read.clean_up(filtered) pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb")) rec = Recommender() test = rec.vectorize() top_ten_ind = rec.recommend() recs = read.df.ix[top_ten_ind] recs = recs.reset_index() recs['img_link'] = map(get_image, recs['title']) recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg' recs = recs[0:20] # Get the top twenty recommendations return render_template('testing.html', data = recs[['title', 'img_link']].values)
def get_terms(self, query_text): query_terms = TextParser.parse(query_text) query_term_ids = [self.term_store.get_id_for_term(term) for term in query_terms] return query_term_ids