def ns(stream, cnt): for x in stream: k = x['label'] if k in known_markers: self.logger.warn('%s: already loaded' % x['label']) continue y = { 'source': source, 'context': context, 'release': release, 'label': x['label'], 'rs_label': x['rs_label'], 'ref_rs_genome': ref_genome, 'dbSNP_build': dbsnp_build, } try: y['mask'] = convert_to_top(x['mask']) except ValueError: y['mask'] = x['mask'] self.logger.warn('%s: could not convert mask to top' % x['label']) cnt["bad"] += 1 else: cnt["good"] += 1 yield y known_markers.add(k)
def generator(mdefs): for t in mdefs: yield { 'source': source, 'context': context, 'release': release, 'ref_rs_genome': ref_rs_genome, 'dbSNP_build': dbsnp_build, 'label': t[0], 'rs_label': t[1], 'mask': convert_to_top(t[2]), }
def generator(mdefs): for t in mdefs: yield { 'source': source, 'context' :context, 'release': release, 'ref_rs_genome': ref_rs_genome, 'dbSNP_build': dbsnp_build, 'label': t[0], 'rs_label': t[1], 'mask': convert_to_top(t[2]), }
try: N = int(sys.argv[1]) except IndexError: N = 100 FLANK_SIZE = 20 with open('marker_definitions.tsv', 'w') as f: tsv = csv.DictWriter(f, fieldnames=['label', 'rs_label', 'mask'], delimiter='\t', lineterminator="\n") tsv.writeheader() j = 0 while j < N: lflank = ''.join([random.choice('ACGT') for i in xrange(FLANK_SIZE)]) rflank = ''.join([random.choice('ACGT') for i in xrange(FLANK_SIZE)]) alleles = '/'.join(random.sample('ACGT', 2)) mask = '%s[%s]%s' % (lflank, alleles, rflank) try: convert_to_top(mask) except ValueError as e: print 'Bad mask, skipping' continue j += 1 t = time.time() % 1000000 y = { 'label': 'foo-%d-%d' % (t, j), 'rs_label': 'rs-foo-%d-%d' % (t, j), 'mask': mask } tsv.writerow(y)
N = int(sys.argv[1]) except IndexError: N = 100 FLANK_SIZE = 20 with open('marker_definitions.tsv', 'w') as f: tsv = csv.DictWriter(f, fieldnames=['label', 'rs_label', 'mask'], delimiter='\t', lineterminator="\n") tsv.writeheader() j = 0 while j < N: lflank = ''.join([random.choice('ACGT') for i in xrange(FLANK_SIZE)]) rflank = ''.join([random.choice('ACGT') for i in xrange(FLANK_SIZE)]) alleles = '/'.join(random.sample('ACGT', 2)) mask = '%s[%s]%s' % (lflank, alleles, rflank) try: convert_to_top(mask) except ValueError as e: print 'Bad mask, skipping' continue j += 1 t = time.time() % 1000000 y = { 'label': 'foo-%d-%d' % (t, j), 'rs_label': 'rs-foo-%d-%d' % (t, j), 'mask': mask } tsv.writerow(y)
def test_convert_to_top_split(self): for i, (s, t) in enumerate(CONVERT_PAIRS): s, t = map(usnp.split_mask, (s.upper(), t.upper())) self.assertEqual(usnp.convert_to_top(s, toupper=False), t, '(%d): %r != %r' % ((i+1), s, t))
def test_convert_to_top(self): for i, (s, t) in enumerate(CONVERT_PAIRS): self.assertEqual(usnp.convert_to_top(s), t, '(%d): %r != %r' % ((i+1), s, t))