def worker_task(annotation_store_path, match_ions_store_path, counter_store_path, data_queue, comm_line): annotation_store = sqlitedict.open(annotation_store_path, journal_mode="OFF") match_ions_store = sqlitedict.open(match_ions_store_path, journal_mode="OFF") did_match_counter = sqlitedict.open(counter_store_path, journal_mode="OFF") cntr = 0 running = True while running: has_signal = comm_line.poll(0.01) if has_signal: signal = comm_line.recv() logger.debug("Signal found: %r", signal) if signal == HALT: running = False logger.debug("Processing Queue %d", cntr) while not data_queue.empty(): try: matches, counter, annotater = data_queue.get(timeout=30) for m in matches: match_ions_store[cntr] = m cntr += 1 increment_counter(did_match_counter, counter) combine_annotations(annotation_store, annotater) except QueueEmptyError: break annotation_store.commit() match_ions_store.commit() did_match_counter.commit() return did_match_counter
def take_best_predictions_spectra(predictions, spectra_db): threshold_predictions = predictions.kvquery() collector = sqlitedict.open("./spectra_counting.db", tablename="spectra_map") for ix, pred in threshold_predictions.iterrows(): glycopeptide_ident = pred.Glycopeptide_identifier matched_spectra = [spectra_db[scan_id] for scan_id in pred.scan_id_range] collector[ix] = {"glycopeptide": glycopeptide_ident, "spectra": matched_spectra} collector.commit() return collector
def test_terminate_instead_close(self): ''' make terminate() instead of close() ''' d = sqlitedict.open('tests/db/sqlitedict-terminate.sqlite') d['abc'] = 'def' d.commit() self.assertEqual(d['abc'], 'def') d.terminate() self.assertFalse(os.path.isfile('tests/db/sqlitedict-terminate.sqlite'))
def taskmain(predictions_path, n_processes=4, prefix_len=0, suffix_len=0, out=None): decoy_path = (predictions_path if out is None else out).rsplit("scored", 1)[0] + "decoy.db" predictions_metadata = sqlitedict.open(predictions_path, "metadata") metadata = dict(predictions_metadata.items()) metadata["tag"] = (metadata["tag"] if metadata["tag"] not in ["", None] else "") + "decoy" metadata["decoy_ratio"] = 1 metadata_table = sqlitedict.open(decoy_path, 'metadata') metadata_table.update(metadata) metadata_table.commit() metadata_table.close() logger.info("Reading sequence space from %r", predictions_path) theoretical_sequences = sqlitedict.open(predictions_path, "theoretical_search_space") decoy_table = sqlitedict.open(decoy_path, "theoretical_search_space", journal_mode="OFF") logger.info("Writing decoys to %r", decoy_path) cntr = 0 task_fn = functools.partial(make_decoy, prefix_len=prefix_len, suffix_len=suffix_len) if n_processes > 1: worker_pool = multiprocessing.Pool(n_processes) for decoy in itertools.chain( worker_pool.imap_unordered(task_fn, theoretical_sequences.itervalues(), chunksize=500)): decoy_table[cntr] = decoy cntr += 1 if cntr % 10000 == 0: logger.info("Processed %d decoys", cntr) decoy_table.commit() worker_pool.terminate() worker_pool.close() else: for decoy in itertools.imap(task_fn, theoretical_sequences.itervalues()): decoy_table[cntr] = decoy cntr += 1 if cntr % 10000 == 0: logger.info("Processed %d decoys", cntr) decoy_table.commit() decoy_table.commit() print len(decoy_table), len(theoretical_sequences) if len(decoy_table) == 0: raise Exception("No decoys generated") logger.info("Decoy creation complete.") return decoy_path
def __init__(self, db_path: str, default_snips_nlu_config: dict = snips_nlu.default_configs.CONFIG_EN): # noqa """Initialize the store Args: db_path: Path to which the database should be persisted default_snips_nlu_config: Configuration passed to SnipsNLUEngine. """ self.db_path = Path(db_path) if self.db_path.exists(): logging.getLogger().info(f"'{db_path}' already exists; using that'") self.__db = sqlitedict.open(str(self.db_path), autocommit=True) self.default_snips_nlu_config = default_snips_nlu_config logging.getLogger().info(f"Initialized {self.__class__.__name__} with path '{self.db_path}'")
def build_database(data_path, db_path=None): if db_path is None: db_path = "./glycomedb.db" files = os.listdir(data_path) db = sqlitedict.open(db_path) total = float(len(files)) for i, f in enumerate(files): if i % 10 == 0: print("%f%%" % (i/total * 100)) if f[-3:] != 'txt': continue try: key, record = prepare_file(f) print(record) db[key] = record except (glycoct.GlycoCTSectionUnsupported, glycoct.GlycoCTError), e: print(e)
def main(): """ main """ global VERBOSE_LEVEL cwd = os.path.basename(os.getcwdu()) cwd_search = search_by_dir(cwd) parser = argparse.ArgumentParser(description="search on discogs.com") parser.add_argument('search', help='search string', nargs='*') parser.add_argument('-a', '--as_dir', help='add current dir for search', action="store_true") parser.add_argument('-A', '--as_dir_log', help='add current dir for search & output to .lst', action="store_true") parser.add_argument('-l', '--log', help='save stdout to log-file, use "." for as-dir', type=str, action="store") parser.add_argument('-t', '--tag_cue', help='tag .cue for one release', type=str, action="store") parser.add_argument('-s', '--tag_skip', help='skip some tracks', type=int, action="store", default=0) parser.add_argument( '-p', '--param', help='search parameter: <name>=<value> (ex. type=artist)', action='append') parser.add_argument('-d', '--deep', help='deep search, "-dd" for labels', action='count', default=0) parser.add_argument('-i', '--id', help='client id [%r]' % CLIENT_ID, default=CLIENT_ID) parser.add_argument('-q', '--quality', help='search quality', type=float, default=0.1) parser.add_argument('-J', '--make_json', help='make .json', action='store_true') parser.add_argument('-R', '--raw_print', help='print raw data (for debug)', action='store_true') parser.add_argument('-P', '--print_release', help='print formatted release info', action='store_true') parser.add_argument('-I', '--object_id', help='search by id, default for "type=release"', action='store_true') parser.add_argument('-c', '--cache', help='use cache [%r]' % CACHE_USE, action='store_true', default=CACHE_USE) parser.add_argument('-C', '--cache_db', help='cache name [%r]' % CACHE_STORAGE, default=CACHE_STORAGE) parser.add_argument('-U', '--update_db', help='update cache, "-UU" for full update', action='count', default=0) parser.add_argument('-T', '--table_db', help='table name for cache [%r]' % CACHE_TABLE, default=CACHE_TABLE) parser.add_argument('-v', '--verbose', help='increase output verbosity, "-vv" for more', action="count", default=0) parser.add_argument('--timeout', help='maximum timeout for network operation(s)', type=float, default=120.0) parser.add_argument('--images', help='load release image(s)', action="store_true") parser.add_argument('--all_images', help='load all image(s)', action="store_true") parser.add_argument('--user_token', help='user token [%r]' % USER_TOKEN, default=USER_TOKEN) parser.add_argument('--user_secret', help='user secret [%r]' % USER_SECRET, default=USER_SECRET) args = parser.parse_args() # global param VERBOSE_LEVEL = args.verbose logger("{%r}" % args, level=2) if args.quality < 0.0: args.quality = 0.1 # if args.quality > 1.0: args.quality = 1.0 # if args.as_dir_log: args.as_dir = True args.log = cwd + ".lst" # if args.log: if args.log == ".": args.log = cwd + ".lst" # # need unicode log-name name = args.log if isinstance(name, str): name = name.decode(DEFAULT_ENCODING) # logger("info: stdout => {%s}" % name) log = open(name, "w") sys.stdout = log # if args.as_dir: args.search.append(cwd_search) logger("info: dir-search for {%s} => {%s}" % (cwd, cwd_search)) # args.search.append(os.getcwdu()) # dc = discogs_client.Client(args.id) dc.set_consumer_key(CONSUMER_KEY, CONSUMER_SECRET) if not (args.user_token and args.user_secret): logger("authorization (discogs.com):") access_token, access_secret, authorize_url = dc.get_authorize_url() logger("access_token{%s} access_secret{%s}" % (access_token, access_secret)) logger("authorize_url{%s}" % authorize_url) verifier = raw_input("verifier> ") token, secret = dc.get_access_token(verifier) logger("user token{%s}" % token) logger("user secret{%s}" % secret) return None else: dc.set_token(args.user_token, args.user_secret) # dc_fetcher = getattr(dc, "_fetcher", None) # setup verbose if VERBOSE_LEVEL > 1: dc.verbose = True # # setup cache if args.cache: logger("cache: db{%r}, table{%r}, update{%r}" % (args.cache_db, args.table_db, args.update_db), level=2) db = sqlitedict.open(filename=args.cache_db, tablename=args.table_db) cache = CacheFetcher(dc_fetcher, db, args.update_db, commit_max=CACHE_SYNC, compression=CACHE_COMPRESS) dc._fetcher = cache else: logger("cache: internal (temporary)") cache = CacheFetcherDict(dc_fetcher) dc._fetcher = cache # # parse params p = {} if args.param: for params in args.param: for param in params.split(","): name, _, value = param.partition("=") name = name.strip() value = value.strip() if not (name or value): continue # if not value: value = name name = "type" # p[name.strip()] = value.strip() # # # s_type = p.get("type", "release") s_meth = getattr(dc, s_type, None) # all is unicode now (must be) logger("search [1]: %s" % args.search, level=1) args.search = [ cuelib.as_unicode(s, DEFAULT_ENCODING, True)[0] for s in args.search ] logger("search [2]: %s" % args.search, level=1) # load info from db rt = [] tt = time.time() deep_data = None for search in args.search: # if isinstance(search, unicode): # search = search.encode(encoding='utf-8') # # # auto-detect: .cue & folder if search.lower().endswith(".cue") and os.path.isfile(search): old_search = search search = search_by_cue(old_search) logger("info: cue-search for {%s} => {%s}" % (old_search, search)) elif os.path.isdir(search): old_search = search search = search_by_dir(os.path.basename(old_search)) logger("info: dir-search for {%s} => {%s}" % (old_search, search)) elif search.startswith("@") and os.path.isfile(search[1:]): old_search = search[1:] search = search_by_file(old_search) logger("info: file-search for {%s} => {%s}" % (old_search, search)) # search = search.lower() a_search = filter( None, search.replace(".", " ").replace(",", " ").replace("&", " ").split(" ")) a_search = [s.strip() for s in a_search] search = ' '.join(a_search) logger("info: search for {%s}" % a_search, level=1) # warning # if len(a_search) == 1: # # #args.quality = 1.0 # # if args.quality != 1.0: # # logger("warning: use quality setting as 1.0 for better result") # # # # pass # # if search.isdigit() and args.object_id: object_id = int(search) if callable(s_meth): o = s_meth(object_id) sr = [o] url = o.url # ignore "quality" setting args.quality = 0 else: sr = [] url = "" # else: if isinstance(search, unicode): searchu = search else: searchu = unicode(search, DEFAULT_ENCODING) # # search8 = searchu.encode('utf-8') sq = dc.search(searchu, **p) url = sq.url logger("search{%s} url{%s}" % (search, sq.url), level=1) sr = load_result(sq, a_search, args.quality, timeout=args.timeout) # logger("for %r found %s item(s)" % (search, len(sr)), level=1) # sr = [x for x in sr if find_words(get_name_or_title(x), a_search)[-1] >= args.quality] logger("after filter{%r} found %s item(s)" % (args.quality, len(sr)), level=1) # # deep loading deep_data = {} if sr: for o in sr: if args.raw_print: print_raw(o) # ok = False if isinstance(o, Artist) and args.deep >= 1: ok = True elif isinstance(o, Label) and args.deep > 1: ok = True elif isinstance(o, Master): ok = True # if ok: deep_result(o, deep_data, a_search, args.quality, args.timeout) # # # add found objects from deep loding (object => list-if-releases) for o in deep_data: if o not in sr: sr.append(o) # # logger("deep{%s}" % repr(deep_data), level=2) # if sr: rt.append((search, url, sort_result(sr), deep_data)) logger("items{%s}" % repr(rt[-1]), level=2) # # # collect all releases, artists, labels releases = [] artists = [] labels = [] for _search, _url, sr, dd in rt: releases.extend(sr[Release]) for o_type in (Label, Artist, Master): for o in sr[o_type]: releases.extend(dd.get(o, [])) # # artists.extend(sr[Artist]) labels.extend(sr[Label]) # logger("%r: labels: %r, artists: %r, releases: %r" % (_search, len(labels), len(artists), len(releases))) # if cache: cache.commit() # if len(releases) == 1: args.print_release = True # # total time tt = time.time() - tt logger() s = "time {%.4f}, search {%s}" % (tt, ' | '.join(args.search)) logger(s, level=0) logger("~" * len(s), level=0) logger() dump = ((Label, labels, "%10d | %s"), (Artist, artists, "%10d | %s {%s}"), (Release, releases, "%10d | %s - %s (%s) @ %s # %s <%s>")) for t, data, s in dump: d = [] _ = [obj_dump(o, d) for o in data] d = obj_sort(t, d) if not d: continue # ids = {} for x in d: _id = x[0] if _id not in ids: logger(s % x) ids[_id] = True # # logger() logger("%r: found %d item(s)" % (t.__name__, len(ids))) logger() # if args.print_release: head_fmt = (("id", "%d"), ("title", "%s - %s (%s)"), ("format", "%s"), ("label", "%s"), ("genre", "%s")) head = '\n'.join(["%-6s / %s" % x for x in head_fmt]) for o in releases: print_release(o, head=head) logger() # # if args.make_json: for o in releases: make_json(o) # # # try to tag .cue if args.tag_cue: _, cue_name = cue_tagger(releases, args.tag_cue, args.tag_skip) logger("tag: '%s'" % cue_name) # # load release image(s) if args.images: for o in releases: load_images(o) # # return rt, deep_data
def sync_matches_db(self, other_db_file, table_name="matched_ions"): other_db = sqlitedict.open(other_db_file, table_name, journal_mode="OFF") matches = sqlitedict.open(self.match_ions_store) for k, v in matches.iteritems(): other_db[k] = v
def open_counter_store(self): return sqlitedict.open(self.counter_store)
def open_match_ions_store(self): return sqlitedict.open(self.match_ions_store)
def open_annotation_store(self): return sqlitedict.open(self.annotation_store)
def main(): """ main """ global VERBOSE_LEVEL cwd = os.path.basename(os.getcwdu()) cwd_search = search_by_dir(cwd) parser = argparse.ArgumentParser(description="search on discogs.com") parser.add_argument('search', help='search string', nargs='*') parser.add_argument('-a', '--as_dir', help='add current dir for search', action="store_true") parser.add_argument('-A', '--as_dir_log', help='add current dir for search & output to .lst', action="store_true") parser.add_argument('-l', '--log', help='save stdout to log-file, use "." for as-dir', type=str, action="store") parser.add_argument('-t', '--tag_cue', help='tag .cue for one release', type=str, action="store") parser.add_argument('-s', '--tag_skip', help='skip some tracks', type=int, action="store", default=0) parser.add_argument('-p', '--param', help='search parameter: <name>=<value> (ex. type=artist)', action='append') parser.add_argument('-d', '--deep', help='deep search, "-dd" for labels', action='count', default=0) parser.add_argument('-i', '--id', help='client id [%r]' % CLIENT_ID, default=CLIENT_ID) parser.add_argument('-q', '--quality', help='search quality', type=float, default=0.1) parser.add_argument('-J', '--make_json', help='make .json', action='store_true') parser.add_argument('-R', '--raw_print', help='print raw data (for debug)', action='store_true') parser.add_argument('-P', '--print_release', help='print formatted release info', action='store_true') parser.add_argument('-I', '--object_id', help='search by id, default for "type=release"', action='store_true') parser.add_argument('-c', '--cache', help='use cache [%r]' % CACHE_USE, action='store_true', default=CACHE_USE) parser.add_argument('-C', '--cache_db', help='cache name [%r]' % CACHE_STORAGE, default=CACHE_STORAGE) parser.add_argument('-U', '--update_db', help='update cache, "-UU" for full update', action='count', default=0) parser.add_argument('-T', '--table_db', help='table name for cache [%r]' % CACHE_TABLE, default=CACHE_TABLE) parser.add_argument('-v', '--verbose', help='increase output verbosity, "-vv" for more', action="count", default=0) parser.add_argument('--timeout', help='maximum timeout for network operation(s)', type=float, default=120.0) parser.add_argument('--images', help='load release image(s)', action="store_true") parser.add_argument('--all_images', help='load all image(s)', action="store_true") parser.add_argument('--user_token', help='user token [%r]' % USER_TOKEN, default=USER_TOKEN) parser.add_argument('--user_secret', help='user secret [%r]' % USER_SECRET, default=USER_SECRET) args = parser.parse_args() # global param VERBOSE_LEVEL = args.verbose logger("{%r}" % args, level=2) if args.quality < 0.0: args.quality = 0.1 # if args.quality > 1.0: args.quality = 1.0 # if args.as_dir_log: args.as_dir = True args.log = cwd + ".lst" # if args.log: if args.log == ".": args.log = cwd + ".lst" # # need unicode log-name name = args.log if isinstance(name, str): name = name.decode(DEFAULT_ENCODING) # logger("info: stdout => {%s}" % name) log = open(name, "w") sys.stdout = log # if args.as_dir: args.search.append(cwd_search) logger("info: dir-search for {%s} => {%s}" % (cwd, cwd_search)) # args.search.append(os.getcwdu()) # dc = discogs_client.Client(args.id) dc.set_consumer_key(CONSUMER_KEY, CONSUMER_SECRET) if not (args.user_token and args.user_secret): logger("authorization (discogs.com):") access_token, access_secret, authorize_url = dc.get_authorize_url() logger("access_token{%s} access_secret{%s}" % (access_token, access_secret)) logger("authorize_url{%s}" % authorize_url) verifier = raw_input("verifier> ") token, secret = dc.get_access_token(verifier) logger("user token{%s}" % token) logger("user secret{%s}" % secret) return None else: dc.set_token(args.user_token, args.user_secret) # dc_fetcher = getattr(dc, "_fetcher", None) # setup verbose if VERBOSE_LEVEL > 1: dc.verbose = True # # setup cache if args.cache: logger("cache: db{%r}, table{%r}, update{%r}" % (args.cache_db, args.table_db, args.update_db), level=2) db = sqlitedict.open(filename=args.cache_db, tablename=args.table_db) cache = CacheFetcher(dc_fetcher, db, args.update_db, commit_max=CACHE_SYNC, compression=CACHE_COMPRESS) dc._fetcher = cache else: logger("cache: internal (temporary)") cache = CacheFetcherDict(dc_fetcher) dc._fetcher = cache # # parse params p = {} if args.param: for params in args.param: for param in params.split(","): name, _, value = param.partition("=") name = name.strip() value = value.strip() if not (name or value): continue # if not value: value = name name = "type" # p[name.strip()] = value.strip() # # # s_type = p.get("type", "release") s_meth = getattr(dc, s_type, None) # all is unicode now (must be) logger("search [1]: %s" % args.search, level=1) args.search = [cuelib.as_unicode(s, DEFAULT_ENCODING, True)[0] for s in args.search] logger("search [2]: %s" % args.search, level=1) # load info from db rt = [] tt = time.time() deep_data = None for search in args.search: # if isinstance(search, unicode): # search = search.encode(encoding='utf-8') # # # auto-detect: .cue & folder if search.lower().endswith(".cue") and os.path.isfile(search): old_search = search search = search_by_cue(old_search) logger("info: cue-search for {%s} => {%s}" % (old_search, search)) elif os.path.isdir(search): old_search = search search = search_by_dir(os.path.basename(old_search)) logger("info: dir-search for {%s} => {%s}" % (old_search, search)) elif search.startswith("@") and os.path.isfile(search[1:]): old_search = search[1:] search = search_by_file(old_search) logger("info: file-search for {%s} => {%s}" % (old_search, search)) # search = search.lower() a_search = filter(None, search.replace(".", " ").replace(",", " ").replace("&", " ").split(" ")) a_search = [s.strip() for s in a_search] search = ' '.join(a_search) logger("info: search for {%s}" % a_search, level=1) # warning # if len(a_search) == 1: # # #args.quality = 1.0 # # if args.quality != 1.0: # # logger("warning: use quality setting as 1.0 for better result") # # # # pass # # if search.isdigit() and args.object_id: object_id = int(search) if callable(s_meth): o = s_meth(object_id) sr = [o] url = o.url # ignore "quality" setting args.quality = 0 else: sr = [] url = "" # else: if isinstance(search, unicode): searchu = search else: searchu = unicode(search, DEFAULT_ENCODING) # # search8 = searchu.encode('utf-8') sq = dc.search(searchu, **p) url = sq.url logger("search{%s} url{%s}" % (search, sq.url), level=1) sr = load_result(sq, a_search, args.quality, timeout=args.timeout) # logger("for %r found %s item(s)" % (search, len(sr)), level=1) # sr = [x for x in sr if find_words(get_name_or_title(x), a_search)[-1] >= args.quality] logger("after filter{%r} found %s item(s)" % (args.quality, len(sr)), level=1) # # deep loading deep_data = {} if sr: for o in sr: if args.raw_print: print_raw(o) # ok = False if isinstance(o, Artist) and args.deep >= 1: ok = True elif isinstance(o, Label) and args.deep > 1: ok = True elif isinstance(o, Master): ok = True # if ok: deep_result(o, deep_data, a_search, args.quality, args.timeout) # # # add found objects from deep loding (object => list-if-releases) for o in deep_data: if o not in sr: sr.append(o) # # logger("deep{%s}" % repr(deep_data), level=2) # if sr: rt.append((search, url, sort_result(sr), deep_data)) logger("items{%s}" % repr(rt[-1]), level=2) # # # collect all releases, artists, labels releases = [] artists = [] labels = [] for _search, _url, sr, dd in rt: releases.extend(sr[Release]) for o_type in (Label, Artist, Master): for o in sr[o_type]: releases.extend(dd.get(o, [])) # # artists.extend(sr[Artist]) labels.extend(sr[Label]) # logger("%r: labels: %r, artists: %r, releases: %r" % (_search, len(labels), len(artists), len(releases))) # if cache: cache.commit() # if len(releases) == 1: args.print_release = True # # total time tt = time.time() - tt logger() s = "time {%.4f}, search {%s}" % (tt, ' | '.join(args.search)) logger(s, level=0) logger("~" * len(s), level=0) logger() dump = ((Label, labels, "%10d | %s"), (Artist, artists, "%10d | %s {%s}"), (Release, releases, "%10d | %s - %s (%s) @ %s # %s <%s>")) for t, data, s in dump: d = [] _ = [obj_dump(o, d) for o in data] d = obj_sort(t, d) if not d: continue # ids = {} for x in d: _id = x[0] if _id not in ids: logger(s % x) ids[_id] = True # # logger() logger("%r: found %d item(s)" % (t.__name__, len(ids))) logger() # if args.print_release: head_fmt = (("id", "%d"), ("title", "%s - %s (%s)"), ("format", "%s"), ("label", "%s"), ("genre", "%s")) head = '\n'.join(["%-6s / %s" % x for x in head_fmt]) for o in releases: print_release(o, head=head) logger() # # if args.make_json: for o in releases: make_json(o) # # # try to tag .cue if args.tag_cue: _, cue_name = cue_tagger(releases, args.tag_cue, args.tag_skip) logger("tag: '%s'" % cue_name) # # load release image(s) if args.images: for o in releases: load_images(o) # # return rt, deep_data