Example #1
0
    def show(id):
        try:
            paper = Paper.objects(id=id).get()
            abstract = paper.abstract
            tokens = ''.join(c for c in abstract if c.isalnum() or c.isspace()).split()
            tokens.extend([p.plural(token) for token in tokens])
            two_word = [ " ".join(tokens[i:i+2]) for i in range(len(tokens)-1)]
            two_word_plural = [ p.plural(w) for w in two_word ]
            tokens.extend(two_word)
            tokens.extend(two_word_plural)
            tokens.extend([t[0].upper()+t[1:] if t[0].islower() else t[0].lower()+t[1:] for t in tokens])
            terms = [term.serialize() for term in Term.objects(name__in=tokens)]
            abstract = [abstract]
            for term in terms:
                # print('term {}; len {}'.format(term['name'], len(abstract)))
                name = term['name']
                for i in range(len(abstract)):
                    part = abstract[i]
                    if isinstance(part, str):
                        re1 = re.compile(name, re.I)
                        # print("search {} in {}".format(name, part))
                        m = re.search(re1, part)
                        if m:
                            term_here = term.copy()
                            term_here['here'] = m.group()
                            part = re.split(re1, part)
                            new_part = [term_here] * (len(part) * 2 - 1)
                            new_part[0::2] = part
                            part = new_part
                            abstract = abstract[:i] + part + abstract[i+1:]
                            continue
                        plural = p.plural(name)
                        re2 = re.compile(plural, re.I)
                        # print("search {} in {}".format(name, part))

                        m = re.search(re2, part)
                        if m:
                            term_here = term.copy()
                            term_here['here'] = m.group()
                            part = re.split(re2, part)
                            new_part = [term_here] * (len(part) * 2 - 1)
                            new_part[0::2] = part
                            part = new_part
                            abstract = abstract[:i] + part + abstract[i+1:]
            return jsonify(response=abstract)
        except Exception as e:
            logging.warning(e)
            return jsonify(response=list(), error=True)
Example #2
0
def recommend_from_user(user):
    bag = defaultdict(int)
    for subscription in user.subscriptions:
        all_tokens = []
        papers = subscription.papers
        num_paper = len(papers)
            # voc = Vocabulary()
        for paper in papers:
            try:
                abstract = paper.abstract
                words = list(set([x.strip().strip(",.").lower() for x in abstract.split()]))
                for word in words:
                    if word not in common_words:
                        bag[word] += 1
            except Exception as e:
                logging.warning(e)

        for paper in papers:
            # Map to terms
            abstract = paper.abstract

            tokens = ''.join(c for c in abstract if c.isalnum() or c.isspace()).split()
            tokens.extend([p.plural(token) for token in tokens])
            # two_word = [ " ".join(tokens[i:i+2]) for i in range(len(tokens)-1)]
            # two_word_plural = [ p.plural(w) for w in two_word ]
            # tokens.extend(two_word)
            # tokens.extend(two_word_plural)
            tokens.extend([t[0].upper()+t[1:] if t[0].islower() else t[0].lower()+t[1:] for t in tokens])
            all_tokens.extend(tokens)
        # print(Term.objects(name__in=all_tokens).count())
        all_tokens = set(all_tokens)
        try:
            all_tokens.remove('toes')
            all_tokens.remove('Toes')
        except:
            pass
        terms = [term.name for term in Term.objects(name__in=all_tokens)]

        for term in terms:
            bag[term] += num_paper

    for history in SearchHistory.objects(user=user.id):
        keyword = history.item.keyword
        bag[keyword] += 100

    return [(x, bag.__getitem__(x)) for x in bag]
    # return [(x, bag.__getitem__(x)) for x in sorted(bag, key=bag.__getitem__, reverse=True)]
Example #3
0
 def parse():
     print("Start to import GO terms")
     count = 0
     name, definition, oid, namespace, tree_number_list, synonyms = None, None, None, None, [], []
     with open("resources/go.obo", 'r') as f:
         for line in f:
             if line == "[Term]\n" or line == "\n":
                 if oid and oid.find("GO") == 0:
                     term = Term(name=name, definition=definition, oid=oid, namespace=namespace, tree_number_list=tree_number_list, synonyms= synonyms, source="GO")
                     term.save()
                     count += 1
                     if count % 100 == 0:
                         print(".", end='',flush=True)
                 name, definition, oid, namespace, tree_number_list, synonyms = None, None, None, None, [], []
             else:
                 if line.find('name:') == 0:
                     name = line.strip()[6:]
                 if line.find('id:') == 0:
                     oid = line.strip()[4:]
                 elif line.find('def:') == 0:
                     definition = line.split('"')[1]
                 elif line.find('namespace:') == 0:
                     namespace = line.strip()[11:]
                 elif line.find('is_a:') == 0:
                     tree_number_list.append(line.strip()[6:16])
                 elif line.find('synonym:') == 0:
                     synonyms.append(line.split('"')[1])
     print("\nFinished importing GO terms")
     print("Start to fetch ancestor objects")
     count = 0
     for term in Term.objects(source="GO"):
         tree_number_list = term.tree_number_list
         for number in tree_number_list:
             try:
                 object = Term.objects(oid=number).get()
                 term.update(push__ancestors=object)
             except Exception as e:
                 logging.warning(e)
                 logging.warning(number)
         count += 1
         if count % 100 == 0:
             print(".", end='',flush=True)