Beispiel #1
0
def get_tr_dicts_and_ids():
    # Download metadata file if it is not in data directory
    download_metadata()
    # Get the text ref objects from the DB corresponding to the CORD19
    # articles
    text_refs = get_unique_text_refs()
    md = get_metadata_dict()
    tr_dicts, multiple_tr_ids = cord19_metadata_for_trs(text_refs, md)
    return tr_dicts, multiple_tr_ids
Beispiel #2
0
def get_cord_info():
    global _cord_by_doi
    global _cord_by_pmid
    if not (_cord_by_doi and _cord_by_pmid):
        cord_md = get_metadata_dict()
        for md_entry in cord_md:
            if md_entry.get('doi'):
                _cord_by_doi[md_entry['doi'].upper()] = md_entry
            if md_entry.get('pubmed_id'):
                _cord_by_pmid[md_entry['pubmed_id']] = md_entry
    return (_cord_by_doi, _cord_by_pmid)
Beispiel #3
0
import re
import csv
from covid_19.preprocess import get_metadata_dict, get_zip_texts_for_entry, \
                                get_metadata_df, get_all_texts
from indra_db.util import get_db

covid_docs_file = '../covid_docs_ranked_corona.csv'
covid_pmids = set()
with open(covid_docs_file, 'rt') as f:
    csv_reader = csv.reader(f, delimiter=',')
    for row in csv_reader:
        pmid = row[4]
        covid_pmids.add(pmid)

md = get_metadata_dict()

aa_reg = '[ACDEFGHIKLMNPQRSTVWY]'
mut_reg = '\s+' + aa_reg + '\d+' + aa_reg + '\s+'
print(mut_reg)

aa_short = [
    'ala', 'arg', 'asn', 'asp', 'cys', 'gln', 'glu', 'gly', 'his', 'ile',
    'leu', 'lys', 'met', 'phe', 'pro', 'ser', 'thr', 'trp', 'tyr', 'val'
]
aa_short_reg = '|'.join([aa for aa in aa_short])
aa_seq_reg = '(?:%s)\d{2,5}' % aa_short_reg
print(aa_seq_reg)

ignore_list = (
    'Y2H',  # Yeast two-hybrid
    'C3H',  # Mouse strain