Python get_timestampの例、biothings.utils.common.get_timestamp Pythonの例

コード例 #1

0

ファイルを表示

ファイル: entrez_genomic_pos.py プロジェクト: SuLab/mygene.info

def get_ref_microbe_taxids():
    """
    Downloads the latest bacterial genome assembly summary from the NCBI genome
    ftp site and generate a list of taxids of the bacterial reference genomes.

    :return:
    """
    import urllib.request
    import csv

    urlbase = 'ftp://ftp.ncbi.nlm.nih.gov'
    urlextension = '/genomes/refseq/bacteria/assembly_summary.txt'
    assembly = urllib.request.urlopen(urlbase + urlextension)
    datareader = csv.reader(assembly.read().decode().splitlines(),
                            delimiter="\t")
    taxid = []

    for row in datareader:
        if len(row) == 1 and row[0].startswith("#"):
            continue
        if row[4] in ['reference genome', 'representative genome']:
            taxid.append(row[5])

    ts = get_timestamp()
    dump(taxid, "ref_microbe_taxids_{}.pyobj".format(ts))

    return taxid

コード例 #2

0

ファイルを表示

ファイル: entrez_genomic_pos.py プロジェクト: SuLab/mygene.info

def get_ref_microbe_taxids():
    """
    Downloads the latest bacterial genome assembly summary from the NCBI genome
    ftp site and generate a list of taxids of the bacterial reference genomes.

    :return:
    """
    import urllib.request
    import csv

    urlbase = 'ftp://ftp.ncbi.nlm.nih.gov'
    urlextension = '/genomes/refseq/bacteria/assembly_summary.txt'
    assembly = urllib.request.urlopen(urlbase + urlextension)
    datareader = csv.reader(assembly.read().decode().splitlines(), delimiter="\t")
    taxid = []

    for row in datareader:
        if len(row) == 1 and row[0].startswith("#"):
            continue
        if row[4] in ['reference genome','representative genome']:
            taxid.append(row[5])

    ts = get_timestamp()
    dump(taxid, "ref_microbe_taxids_{}.pyobj".format(ts))

    return taxid

コード例 #3

0

ファイルを表示

ファイル: differ.py プロジェクト: SuLab/biothings.api

def diff_worker_old_vs_new(id_list_old, new_db_col_names, batch_num,
                           diff_folder):
    new = create_backend(new_db_col_names)
    docs_common = new.mget_from_ids(id_list_old)
    ids_common = [_doc['_id'] for _doc in docs_common]
    id_in_old = list(set(id_list_old) - set(ids_common))
    file_name = os.path.join(diff_folder, "%s.pyobj" % str(batch_num))
    _result = {
        'delete': id_in_old,
        'add': [],
        'update': [],
        'source': new.target_name,
        'timestamp': get_timestamp()
    }
    summary = {"add": 0, "update": 0, "delete": len(id_in_old)}
    if len(id_in_old) != 0:
        dump(_result, file_name)
        # compute md5 so when downloaded, users can check integreity
        md5 = md5sum(file_name)
        summary["diff_file"] = {
            "name": os.path.basename(file_name),
            "md5sum": md5
        }

    return summary

コード例 #4

0

ファイルを表示

ファイル: cpdb_base.py プロジェクト: SuLab/mygene.info

def _download(__metadata__):
    from utils.dataload import download as _download

    output_folder = os.path.join(os.path.split(DATA_FOLDER)[0], get_timestamp())
    for species in ['human', 'mouse', 'yeast']:
        url = __metadata__['__url_{}__'.format(species)]
        output_file = 'CPDB_pathways_genes_{}.tab'.format(species)
        _download(url, output_folder, output_file)

コード例 #5

0

ファイルを表示

def _download(__metadata__):
    from utils.dataload import download as _download

    output_folder = os.path.join(os.path.split(DATA_FOLDER)[0], get_timestamp())
    for species in ['human', 'mouse', 'yeast']:
        url = __metadata__['__url_{}__'.format(species)]
        output_file = 'CPDB_pathways_genes_{}.tab'.format(species)
        _download(url, output_folder, output_file)

コード例 #6

0

ファイルを表示

ファイル: __init__.py プロジェクト: SuLab/mygene.info

 def switch_collection(self):
     '''after a successful loading, rename temp_collection to regular collection name,
        and renaming existing collection to a temp name for archiving purpose.
     '''
     if self.temp_collection and self.temp_collection.count() > 0:
         if self.collection.count() > 0:
             # renaming existing collections
             new_name = '_'.join([self.__collection__, 'archive', get_timestamp(), get_random_string()])
             self.collection.rename(new_name, dropTarget=True)
         self.temp_collection.rename(self.__collection__)
     else:
         print("Error: load data first.")

コード例 #7

0

ファイルを表示

 def switch_collection(self):
     '''after a successful loading, rename temp_collection to regular collection name,
        and renaming existing collection to a temp name for archiving purpose.
     '''
     if self.temp_collection_name and self.db[self.temp_collection_name].count() > 0:
         if self.collection.count() > 0:
             # renaming existing collections
             new_name = '_'.join([self.collection_name, 'archive', get_timestamp(), get_random_string()])
             self.collection.rename(new_name, dropTarget=True)
         self.db[self.temp_collection_name].rename(self.collection_name)
     else:
         raise ResourceError("No temp collection (or it's empty)")

コード例 #8

0

ファイルを表示

 def switch_collection(self):
     '''after a successful loading, rename temp_collection to regular collection name,
        and renaming existing collection to a temp name for archiving purpose.
     '''
     if self.temp_collection and self.temp_collection.count() > 0:
         if self.collection.count() > 0:
             # renaming existing collections
             new_name = '_'.join([
                 self.__collection__, 'archive',
                 get_timestamp(),
                 get_random_string()
             ])
             self.collection.rename(new_name, dropTarget=True)
         self.temp_collection.rename(self.__collection__)
     else:
         print("Error: load data first.")

コード例 #9

0

ファイルを表示

def backup(folder=".", archive=None):
    """
    Dump the whole hub_db database in given folder. "archive" can be pass
    to specify the target filename, otherwise, it's randomly generated
    Note: this doesn't backup source/merge data, just the internal data
          used by the hub
    """
    # get database name (ie. hub_db internal database)
    db_name = get_src_dump().database.name
    dump = {}
    for getter in [
            get_src_dump, get_src_master, get_src_build, get_src_build_config,
            get_data_plugin, get_api, get_cmd, get_event, get_hub_config
    ]:
        col = getter()
        dump[col.name] = []
        for doc in col.find():
            dump[col.name].append(doc)
    if not archive:
        archive = "backup_%s_%s.pyobj" % (get_timestamp(), get_random_string())
    path = os.path.join(folder, archive)
    dumpobj(dump, path)
    return path

コード例 #10

0

ファイルを表示

ファイル: differ.py プロジェクト: SuLab/biothings.api

def diff_worker_new_vs_old(id_list_new,
                           old_db_col_names,
                           new_db_col_names,
                           batch_num,
                           diff_folder,
                           diff_func,
                           exclude=[],
                           selfcontained=False):
    new = create_backend(new_db_col_names)
    old = create_backend(old_db_col_names)
    docs_common = old.mget_from_ids(id_list_new)
    ids_common = [_doc['_id'] for _doc in docs_common]
    id_in_new = list(set(id_list_new) - set(ids_common))
    _updates = []
    if len(ids_common) > 0:
        _updates = diff_func(old, new, list(ids_common), exclude_attrs=exclude)
    file_name = os.path.join(diff_folder, "%s.pyobj" % str(batch_num))
    _result = {
        'add': id_in_new,
        'update': _updates,
        'delete': [],
        'source': new.target_name,
        'timestamp': get_timestamp()
    }
    if selfcontained:
        _result["add"] = new.mget_from_ids(id_in_new)
    summary = {"add": len(id_in_new), "update": len(_updates), "delete": 0}
    if len(_updates) != 0 or len(id_in_new) != 0:
        dump(_result, file_name)
        # compute md5 so when downloaded, users can check integreity
        md5 = md5sum(file_name)
        summary["diff_file"] = {
            "name": os.path.basename(file_name),
            "md5sum": md5
        }

    return summary

コード例 #11

0

ファイルを表示

 def generate_target_name(self, build_config_name):
     assert build_config_name is not None
     return '{}_{}_{}'.format(build_config_name, get_timestamp(),
                              get_random_string()).lower()

コード例 #12

0

ファイルを表示

 def generate_target_name(self, build_config_name):
     return 'genedoc_{}_{}_{}'.format(build_config_name, get_timestamp(),
                                      get_random_string()).lower()

コード例 #13

0

ファイルを表示

ファイル: builder.py プロジェクト: SuLab/mygene.info

 def _get_target_name(self):
     return 'genedoc_{}_{}_{}'.format(self._build_config['name'],
                                      get_timestamp(), get_random_string()).lower()

コード例 #14

0

ファイルを表示

ファイル: builder.py プロジェクト: raonyguimaraes/mygene.info

 def _get_target_name(self):
     return 'genedoc_{}_{}_{}'.format(self._build_config['name'],
                                      get_timestamp(),
                                      get_random_string()).lower()