df = pd.merge(merged_map, wave_map, on="SNP", how="left").dropna() df["previ"] = df.i.shift() df2 = df[~(df.previ + 1 == df.i)] if not pd.Index(df.i).is_monotonic: return False return True s = h.parse() s.wavepaths = h.replace(s) wave_maps = h.read_wave_maps(s.wavepaths) wave_fams = h.read_wave_fams(s.wavepaths) merged_map = h.read_map(s.mergepath) merged_fam = h.read_fam(s.mergepath) include_inds = h.read_include_inds(s.indlist) if include_inds: merged_fam = merged_fam[merged_fam["indID"].isin(include_inds)] wave_inds, wave_snps = h.read_wave_dosages(s.wavepaths.filepaths) merged_inds, merged_snps = h.read_dosage(s.mergepath) merged_info = h.read_info(s.mergepath) checks = [ original__inds_in_dosage_and_fam_are_identical, original__variants_in_dosage_and_map_are_identical, original__variants_are_sorted_by_position,
import pandas as pd import helper as h s = h.parse() s.wavepaths = h.replace(s) wave_fams = h.read_wave_fams(s.wavepaths, usecols=["famID", "indID"]) for i, fam in enumerate(wave_fams): fam["wave"] = i + 1 merged_fam = h.read_fam(s.mergepath, usecols=["famID", "indID"]) waves = pd.concat(wave_fams) merged_fam = pd.merge(merged_fam, waves, how='left', on=['famID', 'indID']) merged_fam.to_csv(s.checkfolder + "covar.txt", sep=" ", header=False, index=False)