Esempio n. 1
0
def merge(args):
    filename = args.sample_set_file
    sep = args.sep
    name = args.name
    desc = args.desc
    sample_set_names = args.sample_set_names
    # check arguments
    if name is None:
        name = 'merge'
    if desc is None:
        desc = name
    if not sample_set_names:
        logging.error('No sample set names specified')
        return 1
    sample_sets = dict((ss.name,ss) for ss in _parse_sample_sets(filename, sep))
    new_value_dict = collections.defaultdict(lambda: 0)
    for ss_name in sample_set_names:
        if ss_name not in sample_sets:
            logging.error('Sample set name "%s" not found.. Exiting.' % (ss_name))
            return 1
        ss = sample_sets[ss_name]
        for k,v in ss.value_dict.iteritems():
            if k in new_value_dict:
                continue
            new_value_dict[k] = v
    ss = SampleSet(name, desc, new_value_dict.items())
    print ss.to_json()
Esempio n. 2
0
def newcohort(args):
    sample_set_file = args.sample_set_file
    cohort_file = args.cohort_file
    sep = args.sep
    cohort_samples = set(line.strip() for line in open(cohort_file))
    for ss in _parse_sample_sets(sample_set_file, sep):
        new_value_dict = {}
        hits = 0
        for k,v in ss.value_dict.iteritems(): 
            if k in cohort_samples:
                if v == 1:
                    hits += 1
                new_value_dict[k] = v
        if hits > 0:
            ss = SampleSet(ss.name, ss.desc, new_value_dict.items())
            print ss.to_json()
        else:
            logging.warning('Sample set %s has no hits' % (ss.name))
Esempio n. 3
0
def subset(args):
    filename = args.sample_set_file
    sep = args.sep
    name = args.name
    desc = args.desc
    hit_set_names = args.hit_sets
    miss_set_names = args.miss_sets
    # check arguments
    if name is None:
        name = 'subset'
    if desc is None:
        desc = name
    if not hit_set_names or not miss_set_names:
        logging.error('Sample sets to be considered "hits" or "misses" '
                      'should be specified using --hit and --miss')
        return 1
    sample_sets = dict((ss.name,ss) for ss in _parse_sample_sets(filename, sep))
    new_value_dict = {}
    for ss_name in hit_set_names:
        if ss_name not in sample_sets:
            logging.error('Sample set name "%s" not found.. Exiting.' % (ss_name))
            return 1
        ss = sample_sets[ss_name]
        for k,v in ss.value_dict.iteritems():
            if v == 1:
                new_value_dict[k] = 1
    for ss_name in miss_set_names:
        if ss_name not in sample_sets:
            logging.error('Sample set name "%s" not found.. Exiting.' % (ss_name))
            return 1
        ss = sample_sets[ss_name]
        for k,v in ss.value_dict.iteritems():
            if v == 1:
                new_value_dict[k] = 0
    ss = SampleSet(name, desc, new_value_dict.items())
    print ss.to_json()