def get_systems_by_name(self, sysnames, keep_data=False): co_list = {} db_list = [] output = collections.OrderedDict() # Weed out fake systems first for s in sysnames: coords_data = util.parse_coords(s) if coords_data is not None: cx, cy, cz, name = coords_data co_list[s] = system.System(cx, cy, cz, name) else: db_list.append(s) # Now query for the real ones db_result = {} if any(db_list): result = self._backend.get_systems_by_name(db_list) db_result = { r.name.lower(): r for r in [_make_known_system(t, keep_data) for t in result] } for s in sysnames: if s.lower() in db_result: output[s] = db_result[s.lower()] elif s in co_list: output[s] = co_list[s] else: output[s] = None return output
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('identifier', help='Identifier name e.g. cne.100899.FST.') parser.add_argument('-f', '--file', nargs='?', type=argparse.FileType('r'), default=sys.stdin, help='Input file e.g. d2z.dat.') parser.add_argument('--extra_data', type=argparse.FileType('r'), default='hg18.toDanRer5.seqs.txt', help='Extra data file e.g. hg18.toDanRer5.seqs.txt.') parser.add_argument('--valid', type=int, default=9, help='Field number of valid test coordinates.') OPTS = parser.parse_args() cne_dict = parse_dat(read_fields(f=OPTS.file)) line_tups = read_fields(f=OPTS.extra_data) cne_valids = {} for l in line_tups: danrer_co, valid_co = parse_coords(l[4]), parse_coords(l[OPTS.valid-1]) valid_indices = (valid_co['start'] - danrer_co['start'], valid_co['end'] - danrer_co['start']) cne_valids[l[0]] = valid_indices plot_cne(OPTS.identifier, cne_dict[OPTS.identifier], cne_valids[OPTS.identifier])
def get_system_by_name(self, sysname, keep_data=False): # Check the input against the "fake" system format of "[123.4,56.7,-89.0]"... coords_data = util.parse_coords(sysname) if coords_data is not None: cx, cy, cz, name = coords_data return system.System(cx, cy, cz, name) else: result = self._backend.get_system_by_name(sysname) if result is not None: return _make_known_system(result, keep_data) else: return None
def row_search(OPTS, m, line_tups): """Writes the scan results to a pickle file. The pickled object is a dictionary with cne names as keys and list as value. Each list contains tuples of (coord, index) where coord is a coord dict and index is int index starting from 0. """ rows = {} for i, l in enumerate(line_tups): name, a, b, c, = l[0], l[OPTS.a-1], l[OPTS.b-1], parse_coords(l[OPTS.c-1]) m.fit([a]) hits = m.scan([b], n=None, reverse_complement=True, sort=False)[0] scores = [score for index, score in hits] rows[name] = scores progress(50, (float(i)+1)/len(line_tups)*100, pre="Processing genes") pkl_filename = OPTS.model + '.pkl' sys.stdout.write("Writing pickle file %s. Do not exit!\n" % pkl_filename) pkl = open(OPTS.model + '.pkl', 'wb') pickle.dump(rows, pkl, pickle.HIGHEST_PROTOCOL)