Example #1
0
def sample_data_from_DB(sampleids, mousedb="Hoekstra lab mouse database"):
    td = preprocess_radtag_lane.no_net_get_table_as_dict(mousedb)
    ped = dict(
        [
            (d["id"], (d["damid"], d["sireid"]))
            for d in td
            if d.get("id", "") in sampleids and d.has_key("sireid") and d.has_key("damid")
        ]
    )
    ped_parents = reduce(lambda x, y: x + y, ped.values())
    ped.update(
        dict(
            [
                (d["id"], (d["damid"], d["sireid"]))
                for d in td
                if d.get("id", "") in ped_parents and d.has_key("sireid") and d.has_key("damid")
            ]
        )
    )

    recombinants = [
        d["id"] for d in td if d.get("id", "") in ped.keys() and "," in d["damstrain"] and "," in d["sirestrain"]
    ]

    parents = []
    for f2 in recombinants:
        for f1 in ped[f2]:
            for g0 in ped[f1]:
                parents.append(g0)
    parents = list(set(parents))

    parents_spp = dict([(d["id"], d["damstrain"]) for d in td if d.get("id", "") in parents])
    parents_spp

    return ped, recombinants, parents, parents_spp
Example #2
0
def sample_data_from_DB(sampleids, mousedb='Hoekstra lab mouse database'):
    td = preprocess_radtag_lane.no_net_get_table_as_dict(mousedb)
    ped = dict([ (d['id'], (d['damid'],d['sireid'])) \
                 for d in td \
                 if d.get('id','') in sampleids \
                 and d.has_key('sireid') \
                 and d.has_key('damid')])
    ped_parents = reduce(lambda x, y: x + y, ped.values())
    ped.update(dict([ (d['id'],(d['damid'],d['sireid'])) \
                      for d in td \
                      if d.get('id','') in ped_parents \
                      and d.has_key('sireid') \
                      and d.has_key('damid')]))

    recombinants = [d['id'] for d in td if d.get('id','') in ped.keys() \
                    and ',' in d['damstrain'] and ',' in d['sirestrain']]

    parents = []
    for f2 in recombinants:
        for f1 in ped[f2]:
            for g0 in ped[f1]:
                parents.append(g0)
    parents = list(set(parents))

    parents_spp = dict([(d['id'], d['damstrain']) for d in td
                        if d.get('id', '') in parents])
    parents_spp

    return ped, recombinants, parents, parents_spp
    parser.add_argument('-mr','--mapreads_argstr',default="''",type=eval, \
                        help='additional arguments for map_reads_by_indiv-stampy.py. \nMust be single AND double quoted for spaces, e.g. "\'--cleanup --fast_merge --reduce_reads\'"'+ds)
    
    parser.add_argument('reference_fasta',help='reference for stampy')
    parser.add_argument('outroot',help='directory for logfile and vcf creation')
    parser.add_argument('projects',nargs='+',help='project names from DB_library_data to include in run')

    opts = parser.parse_args()

    if opts.vcfname is None:
        vcfname = '-'.join(opts.projects)
    else:
        vcfname = opts.vcfname

    index_lookup = preprocess_radtag_lane.no_net_get_table_as_dict(multiplex_idx_db,tcp_host)
    td = preprocess_radtag_lane.no_net_get_table_as_dict(config.LIBRARY_DATA,tcp_host)

    td = [d for d in td if d.get('project',None) in opts.projects and d.has_key('datapath')]

    print >> sys.stderr, '%s individual records found for projects %s' % (len(td),opts.projects)

    preprocess_targets = []
    expected_fq_d = {}

    if opts.force_db_id:
        transtable,failures = preprocess_radtag_lane.get_legacy_to_DB_lookup(td)
    
    for d in td: #UPDATE FOR DB ID LOOKUP
        if opts.force_db_id:
            if d['sampleid'] in transtable: