Exemple #1
0
def read_gm(opts, logger):
    opts = copy.deepcopy(opts)
    opts.infp = opts.exposure_gm
    header = cleangwas.read_header(opts.infp, opts.sep)
    cnames = cleangwas.parse_header(header, opts, logger,
                                    cleangwas.default_cnames)
    opts.rmpali = False
    opts.rmindel = False
    opts.unique = False
    df = cleangwas.qc(opts, cnames, logger)
    return df
Exemple #2
0
def read_exp(opts, logger):
    opts = copy.deepcopy(opts)
    opts.infp = opts.exposure
    header = cleangwas.read_header(opts.infp, opts.sep)
    cnames = cleangwas.parse_header(header, opts, logger,
                                    cleangwas.default_cnames)
    df = cleangwas.qc(opts, cnames, logger)
    df = cleangwas.selectSNP(opts, df, logger)
    df = cleangwas.resort_col(df)
    df = cleangwas.truncate(opts, df)
    return df
Exemple #3
0
def read_outc(opts, logger):
    for k, v in cleangwas.del_no.items():
        cleangwas.del_no[k] = 0
    opts = copy.deepcopy(opts)
    opts.infp = opts.outcome
    opts.pThresh = 1
    opts.include = 'include'
    header = cleangwas.read_header(opts.infp, opts.sep)
    cnames = cleangwas.parse_header(header, opts, logger,
                                    cleangwas.default_cnames)
    insnps = pd.read_csv(opts.include, header=None, names=["RS"], sep='\s+')
    insnps['RS'] = insnps['RS'].str.upper()
    total_df = pd.DataFrame()
    converter = cleangwas.get_converter(cnames)
    for chunk in pd.read_csv(opts.infp,
                             sep=opts.sep,
                             header=0,
                             names=cnames,
                             dtype=str,
                             iterator=True,
                             chunksize=2000000):
        for k, v in converter.items():
            chunk[k] = chunk[k].apply(v)
        if 'RS' in chunk.columns:
            chunk['RS'] = chunk['RS'].str.upper()
            chunk = chunk[chunk['RS'].isin(insnps['RS'])]
            if not chunk.empty:
                total_df = total_df.append(chunk, sort=False)
        else:
            logger.logger.warning(
                'Can not identify RS column for {0}\nExiting'.format(
                    opts.infp))
            exit(1)
    if not total_df.empty:
        content = total_df.to_csv(path_or_buf=None,
                                  sep='\t',
                                  na_rep='NA',
                                  float_format='%g',
                                  encoding='utf-8',
                                  index=False)
        opts.infp = StringIO(content)
        df = cleangwas.qc(opts, cnames, logger)
        df = cleangwas.selectSNP(opts, df, logger)
        df = cleangwas.resort_col(df)
        df = cleangwas.truncate(opts, df)
    else:
        logger.logger.info('No common SNPs among exposure and outcome.')
        df = pd.DataFrame()
    try:
        os.remove('include')
    except:
        pass
    return df
Exemple #4
0
def read_outc(opts, logger):
    for k, v in cleangwas.del_no.items():
        cleangwas.del_no[k] = 0
    opts = copy.deepcopy(opts)
    opts.infp = opts.outcome
    opts.pThresh = 1
    opts.include = 'include'
    header = cleangwas.read_header(opts.infp, opts.sep)
    cnames = cleangwas.parse_header(header, opts, logger,
                                    cleangwas.default_cnames)
    df = cleangwas.qc(opts, cnames, logger)
    df = cleangwas.selectSNP(opts, df, logger)
    df = cleangwas.resort_col(df)
    df = cleangwas.truncate(opts, df)
    try:
        os.remove('include')
    except:
        pass
    return df
Exemple #5
0
def mr4gm(opts, logger):
    opts = copy.deepcopy(opts)
    opts.infp = opts.exposure_gm
    header = cleangwas.read_header(opts.infp, opts.sep)
    cnames = cleangwas.parse_header(header, opts, logger,
                                    cleangwas.default_cnames)
    all_df = pd.read_csv(opts.exposure_gm,
                         header=0,
                         names=cnames,
                         sep='\s+',
                         dtype=str)
    all_df['RS'].to_csv('include',
                        header=False,
                        sep='\t',
                        na_rep='NA',
                        float_format='%g',
                        encoding='utf-8',
                        index=False)
    out_df = read_outc(opts, logger)
    mr_results = []
    mr_heters = []
    mr_pleios = []
    mr_datas = []
    mr_pleiosnps = []
    for phe in [x for x in all_df.columns if x in ['Phenotype']]:
        #for phe in [x for x in all_df.columns if x in [ 'Phenotype', 'Category']]:
        for x in set(all_df.loc[:, phe]):
            all_df[all_df[phe] == x].copy().to_csv(
                path_or_buf='.exposure_gm.txt',
                sep='\t',
                na_rep='NA',
                float_format='%g',
                encoding='utf-8',
                index=False)
            out_df.to_csv(path_or_buf='.outcome_gm.txt',
                          sep='\t',
                          na_rep='NA',
                          float_format='%g',
                          encoding='utf-8',
                          index=False)
            opts.exposure = '.exposure_gm.txt'
            opts.outcome = '.outcome_gm.txt'
            opts.exp_name = x
            opts.exposure_gm = None
            mr_result, mr_heter, mr_pleio, mr_data, mr_pleiosnp = mr4general(
                opts, logger)
            mr_results.append(mr_result)
            mr_heters.append(mr_heter)
            mr_pleios.append(mr_pleio)
            mr_datas.append(mr_data)
            mr_pleiosnps.append(mr_pleiosnp)
            try:
                os.remove('.exposure_gm.txt')
                os.remove('.outcome_gm.txt')
            except:
                pass
    if len([x for x in mr_results if not x.empty]) != 0:

        mr_results = pd.concat([x for x in mr_results if not x.empty],
                               axis=0,
                               ignore_index=True,
                               sort=False,
                               copy=True)
    if len([x for x in mr_heters if not x.empty]) != 0:
        mr_heters = pd.concat([x for x in mr_heters if not x.empty],
                              axis=0,
                              ignore_index=True,
                              sort=False,
                              copy=True)
    if len([x for x in mr_pleios if not x.empty]) != 0:
        mr_pleios = pd.concat([x for x in mr_pleios if not x.empty],
                              axis=0,
                              ignore_index=True,
                              sort=False,
                              copy=True)
    if len([x for x in mr_datas if not x.empty]) != 0:
        mr_datas = pd.concat([x for x in mr_datas if not x.empty],
                             axis=0,
                             ignore_index=True,
                             sort=False,
                             copy=True)
    if opts.heterogeneity != 'none' and len(
        [x for x in mr_pleiosnps if not x.empty]) != 0:
        mr_pleiosnps = pd.concat([x for x in mr_pleiosnps if not x.empty],
                                 axis=0,
                                 ignore_index=True,
                                 sort=False,
                                 copy=True)
    else:
        mr_pleiosnps = pd.DataFrame()
    return mr_results, mr_heters, mr_pleios, mr_datas, mr_pleiosnps