Ejemplo n.º 1
0
import gpugwas.io as gwasio
import cudf

# Test loading VCF
print("Test loading VCF to DF")
vcf_df = gwasio.load_vcf("data/test.vcf", info_keys=["*"], format_keys=["*"])
#vcf_df = gwasio.load_vcf("/home/jdaw/1kg-data/1kg.vcf", info_keys=["*"], format_keys=["*"])
#vcf_df.to_parquet(path="1kg_full_jdaw_v2.pqt", compression="auto", index=False)
print(vcf_df)

# Test loading annotation file
print("Test loading annotation file")
ann_df = gwasio.load_annotations("data/1kg_annotations.txt")
print(ann_df)

# Test loading VCF (Method 2)
print("Test loading VCF to DF (Method 2)")
vcf_df_2 = gwasio.load_vcf_variantworks("data/test.vcf",
                                        info_keys=["AC", "AF"],
                                        format_keys=["GT"])
print(vcf_df_2)

print("===== TEST PASSED ====")
Ejemplo n.º 2
0
warnings.simplefilter('ignore')

parser = argparse.ArgumentParser(description='Run GPU GWAS Pipeline')
parser.add_argument('--vcf_path', default='./data/test.vcf')
parser.add_argument('--annotation_path', default='./data/1kg_annotations.txt')
parser.add_argument('--workdir', default='./temp/')
args = parser.parse_args()

# Initialize Memory Pool to 10GB
cudf.set_allocator(pool=True, initial_pool_size=1e10)
cp.cuda.set_allocator(rmm.rmm_cupy_allocator)

# Load data
print("Loading data")
vcf_df, feature_mapping = gwasio.load_vcf(args.vcf_path,
                                          info_keys=["AF"],
                                          format_keys=["GT", "DP"])
print(vcf_df.head())
print("Loading annotations")
ann_df = gwasio.load_annotations(args.annotation_path)
#print(ann_df)

# Start benchmarking after I/O
t0 = time.time()

# Filter data
print("Filtering samples")
vcf_df = gwasfilter.filter_samples(vcf_df)
print(vcf_df.head())
print("Filtering variants")
vcf_df = gwasfilter.filter_variants(vcf_df)
Ejemplo n.º 3
0
import gpugwas.filter as gwasfilter
import gpugwas.algorithms as algos
import gpugwas.viz as viz

#import gpugwas.processing as gwasproc

parser = argparse.ArgumentParser(description='Run GPU GWAS Pipeline')
parser.add_argument('--vcf_path', default='./data/test.vcf')
parser.add_argument('--annotation_path', default='./data/1kg_annotations.txt')
parser.add_argument('--workdir', default='./temp/')
args = parser.parse_args()

# Load data
print("Loading data")
vcf_df = gwasio.load_vcf(args.vcf_path,
                         info_keys=["AF"],
                         format_keys=["GT", "DP"])
print(vcf_df.head())
print("Loading annotations")
ann_df = gwasio.load_annotations(args.annotation_path)
#print(ann_df)

# Start benchmarking after I/O
t0 = time.time()

# Filter data
print("Filtering samples")
vcf_df = gwasfilter.filter_samples(vcf_df)
print(vcf_df.head())
print("Filtering variants")
vcf_df = gwasfilter.filter_variants(vcf_df)