def insert_pause(samples, insert_pause_idx, move, pause_type): if (insert_pause_idx <= 0): return stop_sample = samples[insert_pause_idx - 1] start_sample = samples[insert_pause_idx] pause_duration = start_sample.time - stop_sample.time pause_distance = vincenty((radian_to_degree(stop_sample.latitude), radian_to_degree(stop_sample.longitude)), (radian_to_degree(start_sample.latitude), radian_to_degree(start_sample.longitude))).meters # Introduce start of pause sample pause_sample = Sample() pause_sample.move = move pause_sample.utc = stop_sample.utc pause_sample.time = stop_sample.time stop_sample.utc -= timedelta(microseconds=1) # Cut off 1ms from last recorded sample in order to introduce the new pause sample and keep time order stop_sample.time -= timedelta(microseconds=1) pause_sample.events = {"pause": {"state": "True", "type": str(pause_type), "duration": str(pause_duration), "distance": str(pause_distance), }} samples.insert(insert_pause_idx, pause_sample) # Duplicate last element # Introduce end of pause sample pause_sample = Sample() pause_sample.move = move pause_sample.utc = start_sample.utc pause_sample.time = start_sample.time start_sample.utc += timedelta(microseconds=1) # Add 1ms to the first recorded sample in order to introduce the new pause sample and keep time order start_sample.time += timedelta(microseconds=1) pause_sample.events = {"pause": {"state": "False", "duration": "0", "distance": "0", "type": str(pause_type) }} samples.insert(insert_pause_idx + 1, pause_sample)
def setUp(self): for delFile in os.listdir("../testFiles/output/"): file_path = os.path.join("../testFiles/output/", delFile) if os.path.isdir(file_path): shutil.rmtree(file_path) else: os.unlink(file_path) TestMapper.testPool = Pool.Pool("testPool", "../testFiles/output/") Program.config.setPath("refGenome", "../testFiles/input/smallRefGenome.fa") TestMapper.sample = Sample.Sample(TestMapper.testPool, "testLib") TestMapper.testPool.addSample(TestMapper.sample) TestMapper.mapper = Mapper.Mapper()
def setUp(self): for delFile in os.listdir("../testFiles/output/"): file_path = os.path.join("../testFiles/output/", delFile) if os.path.isdir(file_path): shutil.rmtree(file_path) else: os.unlink(file_path) TestDecompressor.testPool = Pool.Pool("testPool", "../testFiles/output/") TestDecompressor.sample = Sample.Sample(TestDecompressor.testPool, "testLib") TestDecompressor.testPool.addSample(TestDecompressor.sample) TestDecompressor.sample.setForwardFq(TestDecompressor.gzFile) TestDecompressor.sample.setReversedFq(TestDecompressor.refGzFile) TestDecompressor.decompressor = Decompressor.Decompressor()
def parse_sample(row, dayfirst_dict, source=None): """Parse a row into a Sample. Parameters ---------- row : dict-like Object whose keys are column headings and values are the row values. dayfirst_dict : dict Dictionary whose keys are names of columns containing date/time data and values are boolean indicating whether the dates in that column should be interpreted as having a day as the first component (True) or a month or year as the first component (False). source : model.Source Source for the returned sample. Returns ------- Sample """ sample = Sample() sample.age_units = get_age_units(row, ureg.years) sample.age = get_age(row) sample.latitude = get_latitude(row) sample.longitude = get_longitude(row) sample.elevation = get_elevation(row) sample.height_units = get_height_units(row, ureg.metres) sample.height = get_height(row) sample.weight_units = get_weight_units(row, ureg.kilograms) sample.weight = get_weight(row) sample.bmi = get_bmi(row) (d, t) = get_collection_datetime(row, dayfirst_dict) sample.sample_date = d sample.sample_time = t sample.sampling_time = parse_sampling_time(sample) sample.sampling_site = parse_sampling_site(row) # Initialize equality attrs if not source: sample.source = parse_source(row) elif isinstance(source, Source): sample.source = source else: raise TypeError(f'Given source was not of type {type(Source())!r}.') sample.orig_study_id = get_study_id(row) sample.orig_subject_id = get_subject_id(row) sample.orig_sample_id = get_sample_id(row) return sample
def testHaplotyperFullPathGrid(self): expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf" gzFile = "../testFiles/input/test.fq.gz" refGzFile = "../testFiles/input/revTest.fq.gz" TestHaplotyper.testPool.vcf = {} TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool, "testLib") TestHaplotyper.testPool.addSample(TestHaplotyper.sample) TestHaplotyper.sample.setForwardFq(gzFile) TestHaplotyper.sample.setReversedFq(refGzFile) TestHaplotyper.sample.reversedFq.forward = False Haplotyper.executeBeagleCluster(TestHaplotyper.testPool) # createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName # self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " + os.path.abspath(expOutFile)) #Check if the file contains exactly one snp self.checkNoOfSnps(expOutFile)
def testSamtoolsMultiple(self): #add an extra sample to the pool TestSnvCaller.sample2 = Sample.Sample(TestSnvCaller.testPool, "testLib2") TestSnvCaller.sample2.bam = BamFile.BamFile(TestSnvCaller.testPool, TestSnvCaller.sample2, TestSnvCaller.inputBam) TestSnvCaller.testPool.addSample(TestSnvCaller.sample2) #Execute and check execution output SamtoolsMpileup.SamtoolsMpileup(TestSnvCaller.testPool).callSnvs() outputFile = TestSnvCaller.testPool.vcf[None].fileName self.assertEqual( os.path.abspath(outputFile), os.path.abspath(TestSnvCaller.expVcfFile), os.path.abspath(outputFile) + " not is " + os.path.abspath(TestSnvCaller.expVcfFile)) #Check if the file contains exactly one snp self.checkNoOfSnps(TestSnvCaller.expVcfFile)
def testHaplotyperPathGrid(self): TestHaplotyper.testPool.vcf = {} TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool, "testLib") TestHaplotyper.testPool.addSample(TestHaplotyper.sample) TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool, TestHaplotyper.sample, TestHaplotyper.inputBam, sortedBam=True, headerLine=True, duplicates=False, mdTag=True, index=True) Haplotyper.executeBeagleCluster(TestHaplotyper.testPool) # self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " + os.path.abspath(expOutFile)) self.checkNoOfSnps( "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf" )
def parse_samples(samples, move): for sample_node in samples: sample = Sample() sample.move = move for child in sample_node.iterchildren(): tag = normalize_tag(child.tag) value = child.text if tag == 'events': sample.events = parse_json(child) elif tag == 'satellites': sample.satellites = parse_json(child) elif tag == 'apps_data': sample.apps_data = parse_json(child) else: set_attr(sample, tag, value) yield sample
def parse_sample(row, attr_map, site_attrs_map=None, time_attr=None): """Parse a row of an indexable collection into a Sample. Parameters ---------- row : indexable collection A row to be parsed into a Sample. attr_map : dict A dictionary mapping indexes for the indexable collection `row` to attribute names of the Sample object. """ sample = Sample() # print(row) for index, attr in attr_map.items(): setattr(sample, attr, row[index]) if site_attrs_map: sample.sampling_site = parse_sample_site(row, site_attrs_map) if time_attr: sample.sampling_time = parse_sample_time(row, time_attr) return sample
def setUp(self): for handler in logging.getLogger().handlers: handler.close() for delFile in os.listdir("../testFiles/output/"): file_path = os.path.join("../testFiles/output/", delFile) if os.path.isdir(file_path): shutil.rmtree(file_path) else: os.unlink(file_path) TestSnvCaller.testPool = Pool.Pool("testPool", "../testFiles/output/") Program.config.setPath("refGenome", "../testFiles/input/smallRefGenome.fa") TestSnvCaller.sample = Sample.Sample(TestSnvCaller.testPool, "testLib") TestSnvCaller.testPool.addSample(TestSnvCaller.sample) TestSnvCaller.sample.bam = BamFile.BamFile(TestSnvCaller.testPool, TestSnvCaller.sample, TestSnvCaller.inputBam, sortedBam=True, headerLine=True, duplicates=False, mdTag=True, index=True)
def run_test(input_images, value=None): # Initialize Test Debugger if DEBUG: if os.path.exists(constants.TEST_DEBUG_PATH): shutil.rmtree(constants.TEST_DEBUG_PATH) os.mkdir(constants.TEST_DEBUG_PATH) # If test program was run with no arguments, each image inside # 'input' folder is tested if input_images == []: input_images = os.listdir(INPUT_IMAGES_PATH) # Choose algorithm algorithm = algorithms.FreeComet(False, False) #algorithm = algorithms.OpenComet() comet_statistics_list = [] head_statistics_list = [] for image_name in input_images: print("Input Image: " + image_name + "\n") # [1] Get input image input_image_path = os.path.join(INPUT_IMAGES_PATH, image_name) grayscale_input_image, original_input_image = utils.read_image( input_image_path, True) if DEBUG: path = __create_debug_path(image_name) utils.save_image(original_input_image, path) # [2] Execute algorithm. Returns a list of lists of contours sample = Sample(image_name, original_input_image) comets_contours_list = algorithm.execute(sample, value) # Build Comet objects comet_list = __build_comets(comets_contours_list, sample) # [3] Get algorithm execution output masks heads_output_mask = numpy.zeros(shape=(grayscale_input_image.shape), dtype=numpy.uint8) comets_output_mask = numpy.zeros(shape=(grayscale_input_image.shape), dtype=numpy.uint8) for comet in comet_list: if comet._get_comet_contour() is not None: utils.draw_contours(comets_output_mask, [comet._get_comet_contour()]) else: utils.draw_contours(comets_output_mask, [comet._get_head_contour()]) utils.draw_contours(heads_output_mask, [comet._get_head_contour()]) utils.save_image(comets_output_mask, "1.png") utils.save_image(heads_output_mask, "2.png") # [4] Get expected output masks (input image has to be previously manually segmented) # Expected output heads mask _, expected_heads_image = utils.read_image( os.path.join(OUTPUT_HEADS_PATH, image_name), True) expected_heads_mask = utils.get_red_from_image(expected_heads_image) expected_heads_mask = utils.to_binary_image( utils.to_gray_image(expected_heads_mask), 1) # Expected output comets mask _, expected_comets_image = utils.read_image( os.path.join(OUTPUT_COMETS_PATH, image_name), True) expected_comets_mask = utils.get_red_from_image(expected_comets_image) expected_comets_mask = utils.to_binary_image( utils.to_gray_image(expected_comets_mask), 1) # [5] Input image test results comet_statistics = __intersection_over_union(comets_output_mask, expected_comets_mask, original_input_image, "COMETS TEST", "Comets", image_name) comet_statistics_list.append(comet_statistics) head_statistics = __intersection_over_union(heads_output_mask, expected_heads_mask, original_input_image, "HEADS TEST", "Heads", image_name) head_statistics_list.append(head_statistics) # Final average test results return __test_results(comet_statistics_list, head_statistics_list)
def parse_sample(row): sample = Sample() return sample
class SampleParserTest(unittest.TestCase): sample_test_file = './data/test_data/samp_metadata/sample1.txt' row = OrderedDict([ ('sample_name', '317.F10'), ('age', '22'), ('age_unit', 'years'), ('altitude', '0'), ('anatomical_body_site', 'FMA:Palm'), ('anonymized_name', 'F10'), ('body_habitat', 'UBERON:skin'), ('body_product', 'UBERON:sebum'), ('body_site', 'UBERON:zone of skin of hand'), ('collection_date', '11/12/2006'), ('country', 'GAZ:United States of America'), ('depth', '0'), ('description', 'human skin metagenome'), ('dna_extracted', 'true'), ('dominant_hand', ''), ('elevation', '1591.99'), ('env_biome', 'ENVO:human-associated habitat'), ('env_feature', 'ENVO:human-associated habitat'), ('host_common_name', 'human'), ('host_subject_id', 'F1'), ('host_taxid', '9606'), ('latitude', '40'), ('longitude', '-105'), ('palm_size', ''), ('physical_specimen_remaining', 'false'), ('public', 'true'), ('qiita_study_id', '317'), ('sample_type', 'XXQIITAXX'), ('sex', 'female'), ('time_since_last_wash', '0'), ('title', 'The influence of sex handedness and washing on the diversity of hand surface bacteriaS1_V160' ) ]) dayfirst_dict = {'collection_date': False} # TODO Update details of source (when necessary) source1 = Source(name='qiita', type_='Database (Public)', url='https://qiita.ucsd.edu/study/description/0') experiment1 = Experiment(source=source1, orig_study_id='317') subject1 = Subject( source=source1, orig_study_id='317', orig_subject_id='F1', sex='female', country='United States of America', race=None, csection=None, disease=None, dob=None, ) subject2 = Subject( source=source1, orig_study_id='317', orig_subject_id='F2', sex='female', country='United States of America', race=None, csection=None, disease=None, dob=None, ) sampling_site = SamplingSite( uberon_habitat_term='UBERON:skin', uberon_product_term='UBERON:sebum', uberon_site_term='UBERON:zone of skin of hand', env_biom_term='ENVO:human-associated habitat', env_feature_term='ENVO:human-associated habitat') sampling_time = Time(timestamp=datetime.datetime(2006, 11, 12), uncertainty=None, date=datetime.date(2006, 11, 12), time=None, year=2006, month=11, day=12, hour=None, minute=None, second=None, season='autumn') sample1 = Sample(source=source1, orig_study_id='317', orig_subject_id='F1', orig_sample_id='317.F10', age_units=ureg.years, age=22.0, latitude=40.0, longitude=-105.0, elevation=1591.99, height_units=ureg.metres, height=None, weight_units=ureg.kilograms, weight=None, bmi=None, sample_date=datetime.date(2006, 11, 12), sample_time=None, sampling_site=sampling_site, sampling_time=sampling_time) sample2 = Sample(source=source1, orig_study_id='317', orig_subject_id='F1', orig_sample_id='317.F12', age_units=ureg.years, age=22.0, latitude=40.0, longitude=-105.0, elevation=1591.99, height_units=ureg.metres, height=None, weight_units=ureg.kilograms, weight=None, bmi=None, sample_date=datetime.date(2006, 11, 12), sample_time=None, sampling_site=sampling_site, sampling_time=sampling_time) sample3 = Sample(source=source1, orig_study_id='317', orig_subject_id='F2', orig_sample_id='317.F20', age_units=ureg.years, age=None, latitude=40.0, longitude=-105.0, elevation=1591.99, height_units=ureg.metres, height=None, weight_units=ureg.kilograms, weight=None, bmi=None, sample_date=datetime.date(2006, 11, 12), sample_time=None, sampling_site=sampling_site, sampling_time=sampling_time) # Not necessary to establish these relationships for purpose of # test_parse_objects: sample1._subject = subject1 sample2._subject = subject1 sample3._subject = subject2 subject1._samples = {sample1, sample2} subject2._samples = {sample3} experiment1._subjects = {subject1, subject2} experiment1._samples = {sample1, sample2, sample3} def test_parse_objects(self): experiment_ids = parse_objects(self.sample_test_file) self.assertIn('317', experiment_ids) experiment = experiment_ids['317'] self.assertEqual(self.experiment1, experiment) self.assertIn(self.subject1, experiment.subjects) self.assertIn(self.subject2, experiment.subjects) self.assertIn(self.sample1, experiment.samples) self.assertIn(self.sample2, experiment.samples) self.assertIn(self.sample3, experiment.samples) # TODO: We will have to test without the source keyword at some point. def test_parse_sample(self): self.maxDiff = None blacklist_attrs = [ '_sa_instance_state', 'source', 'counts', '_experiments', '_subject', '_preparations' ] sample = parse_sample(self.row, self.dayfirst_dict, source=self.source1) sample_attrs = set((key, value) for key, value in sample.__dict__.items() if key not in blacklist_attrs) expected_attrs = set((key, value) for key, value in self.sample1.__dict__.items() if key not in blacklist_attrs) self.assertEqual(sample_attrs, expected_attrs) self.assertEqual(sample.source, self.source1) self.assertEqual(sample.counts, self.sample1.counts) # When sample is parsed, it is not yet associated with subject/experiments self.assertEqual(sample._subject, None) self.assertEqual(sample._experiments, set()) self.assertEqual(sample._preparations, set()) def test_parse_subject(self): self.maxDiff = None blacklist_attrs = [ '_sa_instance_state', 'source', 'counts', 'perturbation_facts', '_experiments', '_samples', '_perturbations' ] subject = parse_subject(self.row, source=self.source1) subject_attrs = set((key, value) for key, value in subject.__dict__.items() if key not in blacklist_attrs) expected_attrs = set((key, value) for key, value in self.subject1.__dict__.items() if key not in blacklist_attrs) self.assertEqual(subject_attrs, expected_attrs) self.assertEqual(subject.source, self.source1) self.assertEqual(subject.counts, self.subject1.counts) self.assertEqual(subject.perturbation_facts, self.subject1.perturbation_facts) # When subject is parsed, it is not yet associated with samples/experiments self.assertEqual(subject._experiments, set()) self.assertEqual(subject._samples, set()) self.assertEqual(subject._perturbations, set()) def test_parse_processing(self): self.maxDiff = None processing1 = Processing(parent=None, parameter_values='{}', orig_prep_id='577', orig_proc_id='2593') processing2 = Processing(parent=processing1, parameter_values='{' '"barcode_type":"golay_12",' '"command":"Split libraries (QIIMEq2 1.9.1)",' '"disable_bc_correction":"False",' '"disable_primers":"False",' '"generated on":"2016-01-14 17:01",' '"input_data":"2593",' '"max_ambig":"6",' '"max_barcode_errors":"1.5",' '"max_homopolymer":"6",' '"max_primer_mismatch":"0",' '"max_seq_len":"1000",' '"min_qual_score":"25",' '"min_seq_len":"200",' '"qual_score_window":"0",' '"reverse_primer_mismatches":"0",' '"reverse_primers":"disable",' '"trim_seq_length":"False",' '"truncate_ambi_bases":"False"' '}', orig_prep_id='577', orig_proc_id='310') processing3 = Processing( parent=processing2, parameter_values='{' '"command":"Pick closed-reference OTUs (QIIMEq2 1.9.1)",' '"generated on":"2015-06-30 14:06",' '"input_data":"310",' '"reference-seq":"/databases/gg/13_8/rep_set/97_otus.fasta",' '"reference-tax":"/databases/gg/13_8/taxonomy/97_otu_taxonomy.txt",' '"similarity":"0.97",' '"sortmerna_coverage":"0.97",' '"sortmerna_e_value":"1",' '"sortmerna_max_pos":"10000",' '"threads":"1"' '}', orig_prep_id='577', orig_proc_id='2594') expected_processings = { '2593': processing1, '310': processing2, '2594': processing3 } processings = parse_processings('./data/test_data/proc1.json') # TODO: Implement workflows and parents as mocks? blacklist_attrs = ['_sa_instance_state', 'workflows', 'parent'] for proc_id, processing in processings.items(): self.assertIn(proc_id, expected_processings) processing_attrs = set( (key, value) for key, value in processing.__dict__.items() if key not in blacklist_attrs) expected_attrs = set( (key, value) for key, value in expected_processings[proc_id].__dict__.items() if key not in blacklist_attrs) self.assertEqual(processing_attrs, expected_attrs)
def parse_samples(tree, move, gpx_namespace, import_options): all_samples = [] tracks = tree.iterchildren(tag=gpx_namespace + GPX_TRK) for track in tracks: track_samples = [] track_segments = track.iterchildren(tag=gpx_namespace + GPX_TRKSEG) for track_segment in track_segments: segment_samples = [] track_points = track_segment.iterchildren(tag=gpx_namespace + GPX_TRKPT) for track_point in track_points: sample = Sample() sample.move = move # GPS position / altitude sample.latitude = degree_to_radian(float(track_point.attrib[GPX_TRKPT_ATTRIB_LATITUDE])) sample.longitude = degree_to_radian(float(track_point.attrib[GPX_TRKPT_ATTRIB_LONGITUDE])) sample.sample_type = GPX_SAMPLE_TYPE if hasattr(track_point, GPX_TRKPT_ATTRIB_ELEVATION): sample.gps_altitude = float(track_point.ele) sample.altitude = int(round(sample.gps_altitude)) # Time / UTC sample.utc = dateutil.parser.parse(str(track_point.time)) # Option flags pause_detected = False # Track segment samples if len(segment_samples) > 0: # Accumulate time delta to previous sample to get the total duration time_delta = sample.utc - segment_samples[-1].utc sample.time = segment_samples[-1].time + time_delta # Accumulate distance to previous sample distance_delta = vincenty((radian_to_degree(sample.latitude), radian_to_degree(sample.longitude)), (radian_to_degree(segment_samples[-1].latitude), radian_to_degree(segment_samples[-1].longitude))).meters sample.distance = segment_samples[-1].distance + distance_delta if time_delta > timedelta(0): sample.speed = distance_delta / time_delta.total_seconds() else: sample.speed = 0 # Option: Pause detection based on time delta threshold if GPX_IMPORT_OPTION_PAUSE_DETECTION in import_options and time_delta > import_options[GPX_IMPORT_OPTION_PAUSE_DETECTION]: pause_detected = True sample.distance = segment_samples[-1].distance sample.speed = 0 # Track segment -> Track (multiple track segments contained) elif len(track_samples) > 0: sample.time = track_samples[-1].time + (sample.utc - track_samples[-1].utc) # Time diff to last sample of the previous track segment sample.distance = track_samples[-1].distance sample.speed = 0 # Track -> Full GPX (multiple tracks contained) elif len(all_samples) > 0: sample.time = all_samples[-1].time + (sample.utc - all_samples[-1].utc) # Time diff to last sample of the previous track sample.distance = all_samples[-1].distance sample.speed = 0 # First sample else: sample.time = timedelta(0) sample.distance = 0 sample.speed = 0 parse_sample_extensions(sample, track_point) segment_samples.append(sample) # Finally insert a found pause based on time delta threshold if pause_detected: insert_pause(segment_samples, len(segment_samples) - 1, move, pause_type=GPX_IMPORT_PAUSE_TYPE_PAUSE_DETECTION) # end for track_points # Insert an pause event between every track segment insert_pause_idx = len(track_samples) track_samples.extend(segment_samples) insert_pause(track_samples, insert_pause_idx, move, pause_type=GPX_TRKSEG) # end for track_segments # Insert an pause event between every track insert_pause_idx = len(all_samples) all_samples.extend(track_samples) insert_pause(all_samples, insert_pause_idx, move, pause_type=GPX_TRK) # end for tracks return all_samples
def strava_import(current_user, activity_id): client = get_strava_client(current_user) activity = client.get_activity(activity_id=activity_id) stream_types = [ 'time', 'distance', 'latlng', 'temp', 'heartrate', 'velocity_smooth', 'altitude' ] streams = client.get_activity_streams(activity_id, types=stream_types) activity_string = map_type(activity.type) result = db.session.query( Move.activity_type).filter(Move.activity == activity_string).first() if result: activity_type, = result else: activity_type = None device = find_device(current_user) move = Move() move.user = current_user move.duration = activity.elapsed_time move.ascent = float(activity.total_elevation_gain) move.speed_avg = float(activity.average_speed) move.hr_avg = heart_rate(activity.average_heartrate) move.temperature_avg = celcius_to_kelvin(activity.average_temp) move.device = device move.date_time = activity.start_date_local move.activity = activity_string move.activity_type = activity_type move.distance = float(activity.distance) move.import_date_time = datetime.now() move.import_module = __name__ move.strava_activity_id = activity_id move.public = False move.source = "Strava activity id=%d; external_id='%s'" % ( activity_id, activity.external_id) if streams: lengths = set([len(streams[stream].data) for stream in streams]) assert len(lengths) == 1 length, = lengths else: length = 0 move.speed_max = move.speed_avg all_samples = [] for i in range(0, length): time = timedelta(seconds=streams['time'].data[i]) distance = float(streams['distance'].data[i]) if 'heartrate' in streams: hr = float(streams['heartrate'].data[i]) else: hr = None if 'latlng' in streams: lat, lng = streams['latlng'].data[i] else: lat = None lng = None if 'altitude' in streams: altitude = float(streams['altitude'].data[i]) else: altitude = None if 'velocity_smooth' in streams: speed = float(streams['velocity_smooth'].data[i]) else: speed = None if 'temp' in streams: temperature = celcius_to_kelvin(streams['temp'].data[i]) else: temperature = None sample = Sample() sample.sample_type = SAMPLE_TYPE sample.move = move sample.time = time sample.utc = (activity.start_date + time).replace(tzinfo=None) sample.distance = distance sample.latitude = degree_to_radian(lat) sample.longitude = degree_to_radian(lng) sample.hr = heart_rate(hr) sample.temperature = temperature sample.speed = speed sample.altitude = altitude move.speed_max = max(move.speed_max, speed) all_samples.append(sample) derive_move_infos_from_samples(move, all_samples) db.session.add(move) db.session.flush() postprocess_move(move) db.session.commit() return move
def findFastqFiles(self, directory, inFormat): """The method findFastqFiles finds all fastq files recursively in a directory, from each directory with fastq files a sample is created. :param directory: the directory where the user hid his fastq files :type directory: str -- path to the directory """ fastqFiles = [] for fileName in os.listdir(directory): fileName = directory + "/" + fileName if os.path.isdir(fileName): self.findFastqFiles(fileName, inFormat) else: if inFormat == "bam": if fileName.endswith(".bam") or fileName.endswith( ".bam.gz"): newSamp = Sample.Sample( self.pool, os.path.basename(os.path.splitext(fileName)[0])) self.samples.append(newSamp) newSamp.bam = BamFile.BamFile(self.pool, newSamp, fileName, sortedBam=True, headerLine=True, duplicates=False, mdTag=True, index=True) self.pool.addSample(newSamp) elif inFormat == "fq": if fileName.endswith(".fq") or fileName.endswith(".fq.gz"): fastqFiles.append(fileName) elif inFormat == "vcf": if fileName.endswith(".vcf") or fileName.endswith( ".vcf.gz"): if len(os.listdir(directory)) == 1: chrom = None else: chrom = self.getChromosomeFromVcf(fileName) self.pool.vcf[chrom] = VcfFile.VcfFile(self.pool, fileName, bcf=False, filtered=True, phased=True, chrom=chrom) elif fileName.endswith(".bcf") or fileName.endswith( ".bcf.gz"): chrom = Tools.getChromosomeOfFile( Program.config.getPath("refGenome"), fileName) self.pool.vcf[chrom] = VcfFile.VcfFile(self.pool, fileName, bcf=True, filtered=True, phased=True, chrom=chrom) if inFormat == "bam" or inFormat == "vcf": return if len(fastqFiles) > 0: #create a library name from the file name libName = os.path.basename(fastqFiles[0]) if libName.endswith("_1.fq") or libName.endswith("_2.fq"): libName = libName[:-5] if libName.endswith("_1.fq.gz") or libName.endswith("_2.fq.gz"): libName = libName[:-8] else: libName = libName[:-3] #create the sample sample = Sample.Sample(self.pool, libName) self.pool.addSample(sample) #add the fastq files to the sample if len(fastqFiles) == 1: sample.setForwardFq(fastqFiles[0]) elif len(fastqFiles) == 2: sample.setForwardFq(fastqFiles[0]) sample.setReversedFq(fastqFiles[1]) elif len(fastqFiles) > 2: if fastqFiles[0].endswith("_1.fq"): suffix = "_1.fq" elif fastqFiles[0].endswith("_1.fq.gz"): suffix = "_1.fq.gz" else: print( "WARNING: files do not end with _1.fq or _1.fq.gz or _2.fq or _2.fq.gz, using all files in one directory as 1 sample with only forward reads" ) suffix = fastqFiles[0][-3:] #create a list of forward fastq files and one of reversed fastq files forward = [] reversedFastq = [] for fastqFile in fastqFiles: if fastqFile.endswith(suffix): forward.append(fastqFile) else: reversedFastq.append(fastqFile) #Convert files to fastqFile objects for i in range(len(forward)): forward[i] = FastqFile.FastqFile( self.pool, sample, forward[i]) for i in range(len(reversedFastq)): reversedFastq[i] = FastqFile.FastqFile( self.pool, sample, reversedFastq[i], forward=False) #add the fastq files to the sample sample.forwardFq = forward sample.reversedFq = reversedFastq
def main(): #Path to config file _, config_path = deepcopy(sys.argv) with open(config_path, "r") as f: config = json.load(f) data_dir = config["data_dir"] if not os.path.isdir(data_dir): os.mkdir(data_dir) specs = config["specifications"] with open(os.path.join(data_dir, "config_copy.json"), 'w', newline='') as f: f.write(json.dumps(config)) vol_dat = [] run_num = 0 for run_vals in specs: concentrations = run_vals["concentrations"] sample_volume = run_vals["sample_volume"] sample_diffusive_const = run_vals["sample_diffusive_const"] num_timesteps = run_vals["Number of timesteps"] molecular_radius = run_vals["Molecular Radius"] min_droplet_vol = run_vals["Min Droplet Volume"] num_droplets = [] means = [] for c in concentrations: print("Started run for concentration " + str(c) + "uM", flush=True) sample = Sample(sample_volume, sample_diffusive_const, c, molecular_radius, min_droplet_vol) aggs = sample.simulate(num_timesteps) volumes = [] for agg in aggs: if agg.is_droplet(): volumes.append(agg.volume()) vol_dat += [volumes] if len(volumes) == 0: num_droplets.append(0) means.append(0) else: nobs, minmax, mean, variance, skewness, kurtosis = sp.describe( volumes) num_droplets.append(nobs) means.append(mean) print("Finished run for concentration " + str(c) + "uM", flush=True) run_num += 1 run_dir = os.path.join(data_dir, "run" + str(run_num)) os.mkdir(run_dir) with open(os.path.join(run_dir, "volumes_dat.csv"), 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerows(vol_dat) plt.rc('font', family='serif') fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(1, 1, 1) for item in (ax.get_xticklabels() + ax.get_yticklabels()): item.set_fontsize(20) for item in ([ax.title, ax.xaxis.label, ax.yaxis.label]): item.set_fontsize(30) ax.plot(concentrations, num_droplets, "sb:") ax.set_title("Number of Droplets") ax.set_xlabel("Concentration (uM)") ax.set_ylabel("Number of Droplets") plt.savefig(os.path.join(run_dir, 'num_droplets.png'), bbox_inches='tight') fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(1, 1, 1) for item in (ax.get_xticklabels() + ax.get_yticklabels()): item.set_fontsize(20) for item in ([ax.title, ax.xaxis.label, ax.yaxis.label]): item.set_fontsize(30) ax.plot(concentrations, means, "sb:") ax.set_title("Mean Droplet Volume") ax.set_xlabel("Concentration (uM)") ax.set_ylabel("Mean Droplet Volume (um^3)") plt.savefig(os.path.join(run_dir, 'mean_droplet_volume.png'), bbox_inches='tight')