Example #1
0
def make_nan(wavespaths, chunk, i):

    chunkfam = h.read_fam(wp0.filepaths[i])
    chunkmap = h.read_map(wavespaths.filepaths[0])
    chunkngt = h.read_ngt(wavespaths.filepaths[0])
    chunkinfo = h.read_info(wavespaths.infopaths[0])
    chunkinfo['info'] = "NA"
    chunkinfo['freq'] = "NA"
    chunkinfo['a1'] = "NA"
    chunkinfo['a2'] = "NA"

    dosage = pd.read_csv(wavespaths.filepaths[0] + '.gz', sep=' ', compression='gzip', usecols=['SNP', 'A1', 'A2'])
    for j in range(len(chunkfam.famID)):
        dosage[chunkfam.famID[j]] = pd.Series("NA", index=dosage.index)
        dosage[chunkfam.indID[j]] = pd.Series("NA", index=dosage.index)

    h.write_fam('temp/' + mf + '/'+ chunk + '_' + str(i), chunkfam)
    h.write_map('temp/' + mf + '/' + chunk + '_' + str(i), chunkmap)
    h.write_ngt('temp/' + mf + '/' + chunk + '_' + str(i), chunkngt)
    h.write_info('temp/' + mf + '/' + chunk + '_' + str(i), chunkinfo)
    dosage.to_csv('temp/' + mf + '/' + chunk + '_' + str(i) + '.gz', sep=' ', index=False, compression='gzip')
Example #2
0
File: check.py Project: ymer/merge
        df["previ"] = df.i.shift()
        df2 = df[~(df.previ + 1 == df.i)]

        if not pd.Index(df.i).is_monotonic:
            return False

    return True


s = h.parse()
s.wavepaths = h.replace(s)

wave_maps = h.read_wave_maps(s.wavepaths)
wave_fams = h.read_wave_fams(s.wavepaths)
merged_map = h.read_map(s.mergepath)
merged_fam = h.read_fam(s.mergepath)

include_inds = h.read_include_inds(s.indlist)
if include_inds:
    merged_fam = merged_fam[merged_fam["indID"].isin(include_inds)]
wave_inds, wave_snps = h.read_wave_dosages(s.wavepaths.filepaths)
merged_inds, merged_snps = h.read_dosage(s.mergepath)
merged_info = h.read_info(s.mergepath)


checks = [
    original__inds_in_dosage_and_fam_are_identical,
    original__variants_in_dosage_and_map_are_identical,
    original__variants_are_sorted_by_position,
    original__indIDs_are_unique,
    original__each_variant_has_just_one_position,
Example #3
0
import pandas as pd
import helper as h

s = h.parse()
s.wavepaths = h.replace(s)

wave_fams = h.read_wave_fams(s.wavepaths, usecols=["famID", "indID"])

for i, fam in enumerate(wave_fams):
    fam["wave"] = i + 1

merged_fam = h.read_fam(s.mergepath, usecols=["famID", "indID"])

waves = pd.concat(wave_fams)

merged_fam = pd.merge(merged_fam, waves, how='left', on=['famID', 'indID'])

merged_fam.to_csv(s.checkfolder + "covar.txt", sep=" ", header=False, index=False)