[antigen_type]_where_homozygous.npy: a boolean array indicating which columns (tiles)
        were kept from the population after removing poorly sequenced columns.
        If settings.REMOVE_HOMOZYGOUS==False, the array will be entirely true
    python_variables.py: python file defining training_names and test_names
"""
import helping_functions as fns
from sklearn import preprocessing
import arvados  # Import the Arvados sdk module
import imp
import os
import time
import numpy as np

########################################################################################################################
# Read constants
NUM_RETRIES = int(arvados.getjobparam('num-retries'))
assert NUM_RETRIES > 0, "'num-retries' must be strictly positive"

antigen_type = str(arvados.getjobparam('antigen-type'))
########################################################################################################################
#Set-up collection and logging file to write out to
out = arvados.collection.Collection(num_retries=NUM_RETRIES)
time_logging_fh = out.open('time_log.txt', 'w')
########################################################################################################################
# Load settings
t0 = time.time()
settings = imp.load_source('settings', arvados.get_job_param_mount('settings'))
t1 = time.time()
time_logging_fh.write('Loading settings %fs\n' % (t1 - t0))
########################################################################################################################
#Get path lengths and path integers
Example #2
0
#!/usr/bin/env python

# Import the hashlib module (part of the Python standard library) to compute md5.
import hashlib

# Import the Arvados sdk module
import arvados

# Get information about the task from the environment
this_task = arvados.current_task()

# Get the "input" field from "script_parameters" on the job creation object
this_job_input = arvados.getjobparam('input')

# Create the object access to the collection referred to in the input
collection = arvados.CollectionReader(this_job_input)

# Create an object to write a new collection as output
out = arvados.CollectionWriter()

# Set the name of output file within the collection
out.set_current_file_name("md5sum.txt")

# Get an iterator over the files listed in the collection
all_files = collection.all_files()

# Iterate over each file
for input_file in all_files:
    # Create the object that will actually compute the md5 hash
    digestor = hashlib.new('md5')
Example #3
0
import json
import os
import time
import numpy as np

def get_bool(param):
    if param.lower() == 'true' or param.lower() == 't':
        return True
    elif param.lower() == 'false' or param.lower() == 'f':
        return False
    raise Exception("%s cannot be parsed as boolean. Please use true/false." % (param))

# Read constants
########################################################################################################################
#Get name to save png file under
PNG_NAME = arvados.getjobparam('png-name')
#Get whether we should use quality or regular numpy files
QUALITY = get_bool(arvados.getjobparam('quality'))
#Get percentage of the tiles we should use in PCA
PERCENT_TO_RETRIEVE = float(arvados.getjobparam('percent-to-analyze'))
#Get if we should only load certain parts of the genome
ACCEPTED_PATHS = []
input_accepted_paths = arvados.getjobparam('accepted-paths')
if input_accepted_paths != None:
    try:
        ACCEPTED_PATHS = json.loads(input_accepted_paths)
    except ValueError:
        raise Exception("Unable to read 'accepted-paths' input as json input: %s" % (input_accepted_paths))
    for lower_path, upper_path in ACCEPTED_PATHS:
        assert re.match('^[0-9a-f]+$', lower_path) != None, \
            "'accepted-paths' input contains an incorrect path hex string (%s) (it does not match '^[0-9a-f]+$')" % (lower_path)
Example #4
0
#!/usr/bin/python

import arvados
import re
import hashlib
import string

api = arvados.api('v1')

piece = 0
manifest_text = ""

# Look for paired reads

inp = arvados.CollectionReader(arvados.getjobparam('reads'))

manifest_list = []

def nextline(reader, start):
    n = -1
    while True:
        r = reader.readfrom(start, 128)
        if r == '':
            break
        n = string.find(r, "\n")
        if n > -1:
            break
        else:
            start += 128
    return n
import matplotlib as mpl # must be imported first to ensure matplotlib visualization works
mpl.use('Agg')

import matplotlib.pyplot as plt
import helping_functions as fns
from sklearn import cross_validation, preprocessing, grid_search, base
import arvados      # Import the Arvados sdk module
import re           # used for error checking
import imp
import os
import time
import numpy as np

########################################################################################################################
# Read constants
NUM_RETRIES = int(arvados.getjobparam('num-retries'))
assert NUM_RETRIES > 0, "'num-retries' must be strictly positive"

antigen_type = str(arvados.getjobparam('antigen-type'))
########################################################################################################################
#Set-up collection and logging file to write out to
out = arvados.collection.Collection(num_retries=NUM_RETRIES)
time_logging_fh = out.open('time_log.txt', 'w')
info_fh = out.open('log.txt', 'w')
########################################################################################################################
# Load settings
t0 = time.time()
settings = imp.load_source('settings', arvados.get_job_param_mount('settings'))
t1 = time.time()
time_logging_fh.write('Loading settings %fs\n' %(t1-t0))
########################################################################################################################
Example #6
0
#!/usr/bin/python

import arvados
import re
import hashlib
import string

api = arvados.api('v1')

piece = 0
manifest_text = ""

# Look for paired reads

inp = arvados.CollectionReader(arvados.getjobparam('reads'))

manifest_list = []

chunking = False  #arvados.getjobparam('chunking')


def nextline(reader, start):
    n = -1
    while True:
        r = reader.readfrom(start, 128)
        if r == '':
            break
        n = string.find(r, "\n")
        if n > -1:
            break
        else:
import matplotlib as mpl  # must be imported first to ensure matplotlib visualization works
mpl.use('Agg')

import matplotlib.pyplot as plt
import helping_functions as fns
from sklearn import cross_validation, preprocessing, grid_search, base
import arvados  # Import the Arvados sdk module
import re  # used for error checking
import imp
import os
import time
import numpy as np

########################################################################################################################
# Read constants
NUM_RETRIES = int(arvados.getjobparam('num-retries'))
assert NUM_RETRIES > 0, "'num-retries' must be strictly positive"

antigen_type = str(arvados.getjobparam('antigen-type'))
########################################################################################################################
#Set-up collection and logging file to write out to
out = arvados.collection.Collection(num_retries=NUM_RETRIES)
time_logging_fh = out.open('time_log.txt', 'w')
info_fh = out.open('log.txt', 'w')
########################################################################################################################
# Load settings
t0 = time.time()
settings = imp.load_source('settings', arvados.get_job_param_mount('settings'))
t1 = time.time()
time_logging_fh.write('Loading settings %fs\n' % (t1 - t0))
Example #8
0
import numpy as np


def convert_to_tile_int(position, path_lengths):
    trunc_path_lengths = path_lengths[1:]
    i = 0
    while position > trunc_path_lengths[i]:
        i += 1
    path = hex(i).lstrip('0x').rstrip('L').zfill(3)
    step = hex(position - path_lengths[i]).lstrip('0x').rstrip('L').zfill(4)
    return int(path + step, 16)


########################################################################################################################
# Read constants
NUM_RETRIES = int(arvados.getjobparam('num-retries'))
assert NUM_RETRIES > 0, "'num-retries' must be strictly positive"

antigen_type = str(arvados.getjobparam('antigen-type'))
########################################################################################################################
#Set-up collection and logging file to write out to
out = arvados.collection.Collection(num_retries=NUM_RETRIES)
time_logging_fh = out.open('time_log.txt', 'w')
info_fh = out.open('log.txt', 'w')
variable_fh = out.open('classifier_variables.py', 'w')
########################################################################################################################
# Load settings
t0 = time.time()
settings = imp.load_source('settings', arvados.get_job_param_mount('settings'))
t1 = time.time()
time_logging_fh.write('Loading settings %fs\n' % (t1 - t0))