Example #1
0
def processGenome(genomefile,
                  plinkexe,
                  bedprefix,
                  prefix,
                  nosex=False,
                  verbose=False,
                  threads=-1):
    """
    Read the genome file (as produced by plink option --genome) and removes
    one of the individuals that have over 0.05 pi_hat (%5 inbreeding coeff.)
    
    :param str genomefile: filename of the genome
    :param str plinkexe: Path and executable of plink
    :param str bedprefix: Prefix for the bed fileset
    :param str prefix: Prefix for the outputs
    """
    if not threads:
        threads = -1
    if verbose:
        print('    Processing %s file' % (genomefile))
    gen = pd.read_table(genomefile,
                        delim_whitespace=True,
                        iterator=True,
                        chunksize=10000)
    rels = Parallel(n_jobs=int(threads))(delayed(prcgenthreads)(ch)
                                         for ch in gen)
    rels = pd.concat(rels)
    excludefile = '%s_relateds.toexclude' % (prefix)
    rels.drop_duplicates().to_csv(excludefile,
                                  sep=' ',
                                  header=False,
                                  index=False)
    if verbose:
        print('\t %d Individuals excluded as relative of other individuals' %
              (rels.shape[1]))
    plremov = (
        '%s --bfile %s --remove %s_relateds.toexclude --make-bed --keep-'
        'allele-oder --out %s')
    if nosex:
        plremov += ' --allow-no-sex'
    plremov = plremov % (plinkexe, bedprefix, prefix, prefix)
    if verbose:
        print('    Removing individuals from file %s' % (excludefile))
    execline(plremov)