def make_nan(wavespaths, chunk, i): chunkfam = h.read_fam(wp0.filepaths[i]) chunkmap = h.read_map(wavespaths.filepaths[0]) chunkngt = h.read_ngt(wavespaths.filepaths[0]) chunkinfo = h.read_info(wavespaths.infopaths[0]) chunkinfo['info'] = "NA" chunkinfo['freq'] = "NA" chunkinfo['a1'] = "NA" chunkinfo['a2'] = "NA" dosage = pd.read_csv(wavespaths.filepaths[0] + '.gz', sep=' ', compression='gzip', usecols=['SNP', 'A1', 'A2']) for j in range(len(chunkfam.famID)): dosage[chunkfam.famID[j]] = pd.Series("NA", index=dosage.index) dosage[chunkfam.indID[j]] = pd.Series("NA", index=dosage.index) h.write_fam('temp/' + mf + '/'+ chunk + '_' + str(i), chunkfam) h.write_map('temp/' + mf + '/' + chunk + '_' + str(i), chunkmap) h.write_ngt('temp/' + mf + '/' + chunk + '_' + str(i), chunkngt) h.write_info('temp/' + mf + '/' + chunk + '_' + str(i), chunkinfo) dosage.to_csv('temp/' + mf + '/' + chunk + '_' + str(i) + '.gz', sep=' ', index=False, compression='gzip')
s = h.parse() s.wavepaths = h.replace(s) wave_maps = h.read_wave_maps(s.wavepaths) wave_fams = h.read_wave_fams(s.wavepaths) merged_map = h.read_map(s.mergepath) merged_fam = h.read_fam(s.mergepath) include_inds = h.read_include_inds(s.indlist) if include_inds: merged_fam = merged_fam[merged_fam["indID"].isin(include_inds)] wave_inds, wave_snps = h.read_wave_dosages(s.wavepaths.filepaths) merged_inds, merged_snps = h.read_dosage(s.mergepath) merged_info = h.read_info(s.mergepath) checks = [ original__inds_in_dosage_and_fam_are_identical, original__variants_in_dosage_and_map_are_identical, original__variants_are_sorted_by_position, original__indIDs_are_unique, original__each_variant_has_just_one_position, merged__variants_are_sorted_by_position, merged__inds_in_dosage_and_fam_are_identical, merged__variants_in_dosage_and_map_are_identical, compare__variants_in_merged_same_as_union_of_variants_in_original, compare__order_of_variants_in_waves_same_as_in_merged, ]