コード例 #1
0
ファイル: search_controller.py プロジェクト: 764664/BioSE
    def search(args):
        keyword = args.get('keyword')

        # Store history in database
        if SearchItem.objects(keyword=keyword).count() == 0:
            search_item = SearchItem(keyword=keyword)
        else:
            search_item = SearchItem.objects(keyword=keyword).get()
        search_item.count += 1
        search_item.save()

        # Load cache
        # search_results = g.get('search_results', None)
        # if search_results is None:
        #     g.search_results = {}
        #     search_results = g.search_results
        #
        # search_id_to_results = g.get('search_id_to_results', None)
        # if search_id_to_results is None:
        #     g.search_id_to_results = {}
        #     search_id_to_results = g.search_id_to_results

        query_result = PaperProcessor(keyword)
        papers = query_result.papers_array

        # paper_ids = [x["DBID"] for x in papers]
        # search_item.update(add_to_set__papers=paper_ids)

        if flask_login.current_user.is_authenticated:
            search_history = SearchHistory(item=search_item,
                                           user=User.objects(id=flask_login.current_user.id).get(),
                                           papers=[x for x in query_result.papers_array])
        else:
            search_history = SearchHistory(item=search_item,
                                           papers=[x for x in query_result.papers_array])
        search_history.save()

        # # Word bag
        # bag = AbstractProcessor().process_list(return_list)
        # words = [[y, bag[y]] for y in sorted(list(bag.keys()), key=lambda x: bag[x], reverse=True)[:30]]

        # Return result
        return jsonify(
            response=str(search_history.id),
            meta_info={
                'page_count': math.ceil(len(papers)/RESULTS_PER_PAGE)
            }
        )
コード例 #2
0
ファイル: paper_processor.py プロジェクト: 764664/BioSE
 def generate_papers_array(self):
     paper_ids = [paper["DBID"] for title, paper in self.papers.items()]
     search_item = SearchItem.objects(keyword=self.keyword).get()
     search_item.update(add_to_set__papers=paper_ids)
     # self.papers_array = list(self.papers.values())
     # logging.info("Have {} papers in total.".format(len(self.papers_array)))
     # self.papers_array.sort(key=lambda x: x["Score"])
     # self.papers_array.reverse()
     # for index, paper in enumerate(self.papers_array):
     #     paper["ID"] = index
     search_item.reload()
     self.papers_array = self.get_scores()
     if not self.papers_array:
         self.papers_array = search_item.papers
コード例 #3
0
ファイル: paper_processor.py プロジェクト: 764664/BioSE
    def get_scores(self):
        try:
            item = SearchItem.objects(keyword=self.keyword).get()
            if item.model:
                regressor = pickle.loads(item.model)
            papers = item.papers
            x = [vectorize_paper(paper) for paper in papers]
            y = regressor.predict(x)
            # return papers
            return itemgetter(*[t[0] for t in sorted(enumerate(y), key=lambda i: i[1], reverse=True)])(papers)
        except Exception as e:
            logging.debug(e)
            return None

    # def add_missing_info(self):
    #     self.add_journal_if_self()

    # # TODO jounal if
    # def add_journal_if_self(self):
    #     for k,v in self.papers.items():
    #         if 'Journal' not in v or not v['Journal']:
    #             v["Journal_IF"] = 0
    #             continue
    #         try:
    #             stripped_journal_name = re.sub('[\W_]+', '', v["Journal"].upper())
    #             v["Journal_IF"] = Journal.get(name==stripped_journal_name).impact_factor
    #         except Exception as e:
    #             try:
    #                 if len(stripped_journal_name) >= 16:
    #                     v["Journal_IF"] = Journal.get(
    #                         name.startswith(stripped_journal_name[:16])).impact_factor
    #                 if len(stripped_journal_name) >= 12:
    #                     v["Journal_IF"] = Journal.get(name.startswith(stripped_journal_name[:12])).impact_factor
    #                 elif len(stripped_journal_name) >= 8:
    #                     v["Journal_IF"] = Journal.get(name.startswith(stripped_journal_name[:8])).impact_factor
    #                 elif len(stripped_journal_name) >= 4:
    #                     v["Journal_IF"] = Journal.get(name.startswith(stripped_journal_name[:4])).impact_factor
    #                 else:
    #                     v["Journal_IF"] = 0
    #             except Exception as e:
    #                 v["Journal_IF"] = 0

    # @staticmethod
    # def add_journal_if(paper_list):
    #     for paper in paper_list:
    #         if 'Journal' not in paper or not paper['Journal']:
    #             paper["Journal_IF"] = 0
    #             continue
    #         try:
    #             stripped_journal_name = re.sub('[\W_]+', '', paper["Journal"].upper())
    #             paper["Journal_IF"] = Journal.get(Journal.title==stripped_journal_name).impact_factor
    #         except DoesNotExist:
    #             try:
    #                 if len(stripped_journal_name) >= 16:
    #                     paper["Journal_IF"] = Journal.get(
    #                         Journal.title.startswith(stripped_journal_name[:16])).impact_factor
    #                 if len(stripped_journal_name) >= 12:
    #                     paper["Journal_IF"] = Journal.get(Journal.title.startswith(stripped_journal_name[:12])).impact_factor
    #                 elif len(stripped_journal_name) >= 8:
    #                     paper["Journal_IF"] = Journal.get(Journal.title.startswith(stripped_journal_name[:8])).impact_factor
    #                 elif len(stripped_journal_name) >= 4:
    #                     paper["Journal_IF"] = Journal.get(Journal.title.startswith(stripped_journal_name[:4])).impact_factor
    #                 else:
    #                     paper["Journal_IF"] = 0
    #             except DoesNotExist:
    #                 paper["Journal_IF"] = 0

    # def ranking(self):
    #     model = self.check_model()
    #     if model:
    #         clf = model[0]
    #         number_clicks = model[1]
    #         maximum_ml_score = -1
    #         for k,v in self.papers.items():
    #             if "Journal_IF" in v and "Year" in v:
    #                 x = [[v["Year"], v["Journal_IF"]]]
    #                 score_ml = clf.predict(x)[0]
    #                 v["Score_ML"] = score_ml
    #                 if score_ml > maximum_ml_score:
    #                     maximum_ml_score = score_ml
    #                 weight = 1 - math.pow(0.5, 0.1*number_clicks)
    #                 v["Weight"] = weight
    #         for k,v in self.papers.items():
    #             if "Score_ML" in v:
    #                 v["Score_ML"] *= 1 / maximum_ml_score
    #                 # logging.debug("{}: {}".format(v["Title"], v["Score_ML"]))
    #                 v["Score"] = v["Score"]*(1-v["Weight"]) + v["Score_ML"]*v["Weight"]
    #     else:
    #         pass

    # TODO: models
    # def check_model(self):
    #     ALWAYS_CREATE_NEW_MODEL_AND_DONT_SAVE = True
    #     if ALWAYS_CREATE_NEW_MODEL_AND_DONT_SAVE:
    #         new_model = self.train_model()
    #         return new_model
    #     try:
    #         search_term = SearchTerm.get(SearchTerm.keyword == self.keyword)
    #         model = Model.get(Model.search_term == search_term)
    #         if datetime.datetime.now() - model.last_modified > datetime.timedelta(days = 1):
    #             new_model = self.train_model()
    #             if new_model:
    #                 model.model = pickle.dumps(new_model)
    #                 model.last_modified = datetime.datetime.now()
    #                 model.save()
    #             return new_model
    #         else:
    #             return pickle.loads(model.model)
    #     except DoesNotExist:
    #         new_model = self.train_model()
    #         if new_model:
    #             Model.create(
    #                 search_term = SearchTerm.get(SearchTerm.keyword == self.keyword),
    #                 model = pickle.dumps(new_model)
    #             )
    #         return new_model

    # def train_model(self):
    #     x, y = [], []
    #     #clicks = SearchTerm.get(SearchTerm.keyword == self.keyword).clicks
    #     clicks = Click.select(Paper, Click).join(Paper).switch(Click).join(SearchTerm).where(SearchTerm.keyword == self.keyword)
    #     if clicks.count() == 0:
    #         return False
    #     for click in clicks:
    #         x.append(
    #             [
    #                 click.paper.year,
    #                 click.paper.journal_if
    #             ]
    #         )
    #         y.append(click.click_count)
    #     #clf = svm.SVR(kernel="rbf")
    #     #clf.fit(x, y)
    #     #return [clf, sum(y)]
    #     gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1)
    #     gp.fit(x, y)
    #     return [gp, sum(y)]