def merge(args): filename = args.sample_set_file sep = args.sep name = args.name desc = args.desc sample_set_names = args.sample_set_names # check arguments if name is None: name = 'merge' if desc is None: desc = name if not sample_set_names: logging.error('No sample set names specified') return 1 sample_sets = dict((ss.name,ss) for ss in _parse_sample_sets(filename, sep)) new_value_dict = collections.defaultdict(lambda: 0) for ss_name in sample_set_names: if ss_name not in sample_sets: logging.error('Sample set name "%s" not found.. Exiting.' % (ss_name)) return 1 ss = sample_sets[ss_name] for k,v in ss.value_dict.iteritems(): if k in new_value_dict: continue new_value_dict[k] = v ss = SampleSet(name, desc, new_value_dict.items()) print ss.to_json()
def newcohort(args): sample_set_file = args.sample_set_file cohort_file = args.cohort_file sep = args.sep cohort_samples = set(line.strip() for line in open(cohort_file)) for ss in _parse_sample_sets(sample_set_file, sep): new_value_dict = {} hits = 0 for k,v in ss.value_dict.iteritems(): if k in cohort_samples: if v == 1: hits += 1 new_value_dict[k] = v if hits > 0: ss = SampleSet(ss.name, ss.desc, new_value_dict.items()) print ss.to_json() else: logging.warning('Sample set %s has no hits' % (ss.name))
def subset(args): filename = args.sample_set_file sep = args.sep name = args.name desc = args.desc hit_set_names = args.hit_sets miss_set_names = args.miss_sets # check arguments if name is None: name = 'subset' if desc is None: desc = name if not hit_set_names or not miss_set_names: logging.error('Sample sets to be considered "hits" or "misses" ' 'should be specified using --hit and --miss') return 1 sample_sets = dict((ss.name,ss) for ss in _parse_sample_sets(filename, sep)) new_value_dict = {} for ss_name in hit_set_names: if ss_name not in sample_sets: logging.error('Sample set name "%s" not found.. Exiting.' % (ss_name)) return 1 ss = sample_sets[ss_name] for k,v in ss.value_dict.iteritems(): if v == 1: new_value_dict[k] = 1 for ss_name in miss_set_names: if ss_name not in sample_sets: logging.error('Sample set name "%s" not found.. Exiting.' % (ss_name)) return 1 ss = sample_sets[ss_name] for k,v in ss.value_dict.iteritems(): if v == 1: new_value_dict[k] = 0 ss = SampleSet(name, desc, new_value_dict.items()) print ss.to_json()