def processGenome(genomefile, plinkexe, bedprefix, prefix, nosex=False, verbose=False, threads=-1): """ Read the genome file (as produced by plink option --genome) and removes one of the individuals that have over 0.05 pi_hat (%5 inbreeding coeff.) :param str genomefile: filename of the genome :param str plinkexe: Path and executable of plink :param str bedprefix: Prefix for the bed fileset :param str prefix: Prefix for the outputs """ if not threads: threads = -1 if verbose: print(' Processing %s file' % (genomefile)) gen = pd.read_table(genomefile, delim_whitespace=True, iterator=True, chunksize=10000) rels = Parallel(n_jobs=int(threads))(delayed(prcgenthreads)(ch) for ch in gen) rels = pd.concat(rels) excludefile = '%s_relateds.toexclude' % (prefix) rels.drop_duplicates().to_csv(excludefile, sep=' ', header=False, index=False) if verbose: print('\t %d Individuals excluded as relative of other individuals' % (rels.shape[1])) plremov = ( '%s --bfile %s --remove %s_relateds.toexclude --make-bed --keep-' 'allele-oder --out %s') if nosex: plremov += ' --allow-no-sex' plremov = plremov % (plinkexe, bedprefix, prefix, prefix) if verbose: print(' Removing individuals from file %s' % (excludefile)) execline(plremov)