def get_locations(): pipeline_obj = Pipeline(); locations_list = pipeline_obj.get_locations() json_dict = {} json_dict['data'] = locations_list print jsonify(json_dict) return jsonify(json_dict)
def get_cluster(location_id): pipeline_obj = Pipeline() nodes_list,links_list = pipeline_obj.get_cluster(location_id) json_dict = {} json_dict['nodes'] = nodes_list json_dict['links'] = links_list return jsonify(json_dict)
def get_dates(location_id): pipeline_obj = Pipeline() dates_list = pipeline_obj.get_dates_location(location_id) json_dict = {} json_dict['data'] = dates_list print json_dict return jsonify(json_dict)
def get_tweets(): trend = request.args.get('trend') entity = request.args.get('entity') pipeline_obj = Pipeline() tweets_list = pipeline_obj.get_tweets(trend,entity) json_dict = {} json_dict['data'] = tweets_list return jsonify(json_dict)
def get_tfidf(): location_id = request.args.get('locationid') trend = request.args.get('trend') pipeline_obj = Pipeline() tfidf_list = pipeline_obj.get_tfidf(location_id,trend) json_dict = {} json_dict['data'] = tfidf_list return jsonify(json_dict)
def get_trends(): location_id = request.args.get('locationid') min_date = request.args.get('min_date') max_date = request.args.get('max_date') pipeline_obj = Pipeline() trends_list = pipeline_obj.get_trends(location_id,min_date,max_date) json_dict = {} json_dict['data'] = trends_list return jsonify(json_dict)
def main(): print "Replay started" pl = Pipeline() pl.addStep(EventGenerator(interval=3000,number=1000)) pl.addStep(TroubleInjector()) dev = dm.getDevice() pl.addStep(TroubleReplayer(dev)) pl.execute() print "Replay finished"
def buildPipelines(pipes, config): """ Generate pipelines """ pipesDict = {} for pipe in pipes: print config try : parms = config[pipe] except : parms = {} print parms x = Pipeline(*parms) x.create() pipesDict[pipe] = x return pipesDict
def pipeline_from_config_file(config_file): """ Create a Pipeline instance from a ConfigObj/INI configuration file `config_file` which specifies the Pipeline steps, data directories etc. """ # Do we have a spec file? If so, do parameter and input/output key # validation as well. If not keep going. spec_file = utilities.find_spec_file(Pipeline) # Now do the actual parsing and, if we do have a spec file, validate as # well. parsed = config_parser.loads(config_file, specfile=spec_file)['pipeline'] # Create a Pipeline instance with no steps, we will add them later. pipe = Pipeline(name=parsed['name'], system=parsed['system'], log_level=parsed.get('log_level', DEFAULT_LOG_LEVEL), local_logs=parsed.get('local_log_mode', DEFAULT_LOCAL_LOGS)) # The only thing that requires special handling is the steps array. # Here we have to create Step instances of the appropriate class and # pass the appropriate Step config file to them. # Also, as part of the "steps" list, we have hints on which data each # Step produces and which data it consumes. In order to transfer these # pieces of data in-memory between steps we have a simple architecture. # We have a dictionary at the Pipeline level where data is put and # possibly updated. This is the clipboard. Then before executing each # Step, the data the Step needs in input is put in Step.inbox which # is a list. Elements are put in that list in the order they are defined # in that Step section of the Pipeline configuration file (inbox # parameter). After the Step completes, data from Step.outbox is # fetched and put in the clipboard. Data in Step.outbox is assumed to # be in the order defined in that Step section of the Pipeline # configuration file (outbox parameter). steps = [Step.from_parsed_config(x, pipe) for x in parsed['steps']] # Finally update the pipe.steps list. We did this so that the Step # instances could make use in their initialization, of whatever they # needed to pull from the Pipeline object they belong to. pipe.configure(steps) return(pipe)
def build(config): stream = file(config, 'r') config = yaml.load(stream) pipes = config.get('pipes') pipeConfig = config.get('pipeConfig') pipesList = [] for pipe in pipes: try : parms = pipeConfig[pipe] except : parms = {} x = Pipeline(*parms) x.create() pipesList.append(x) print pipesList return pipesList
def main(): global optimizer, lr_reducer, views, epoch, pipeline # Read configuration file parser = argparse.ArgumentParser() parser.add_argument("experiment_name") arguments = parser.parse_args() cfg_file_path = os.path.join("./experiments", arguments.experiment_name) args = configparser.ConfigParser() args.read(cfg_file_path) # Prepare rotation matrices for multi view loss function eulerViews = json.loads(args.get('Rendering', 'VIEWS')) views = prepareViews(eulerViews) # Set the cuda device device = torch.device("cuda:0") torch.cuda.set_device(device) # Handle loading of multiple object paths try: model_path_loss = json.loads(args.get('Dataset', 'MODEL_PATH_LOSS')) except: model_path_loss = [args.get('Dataset', 'MODEL_PATH_LOSS')] # Set up batch renderer br = BatchRender(model_path_loss, device, batch_size=args.getint('Training', 'BATCH_SIZE'), faces_per_pixel=args.getint('Rendering', 'FACES_PER_PIXEL'), render_method=args.get('Rendering', 'SHADER'), image_size=args.getint('Rendering', 'IMAGE_SIZE'), norm_verts=args.getboolean('Rendering', 'NORMALIZE_VERTICES')) # Set size of model output depending on pose representation - deprecated? pose_rep = args.get('Training', 'POSE_REPRESENTATION') if (pose_rep == '6d-pose'): pose_dim = 6 elif (pose_rep == 'quat'): pose_dim = 4 elif (pose_rep == 'axis-angle'): pose_dim = 4 elif (pose_rep == 'euler'): pose_dim = 3 else: print("Unknown pose representation specified: ", pose_rep) pose_dim = -1 # Initialize a model using the renderer, mesh and reference image model = Model(num_views=len(views)) model.to(device) # Create an optimizer. Here we are using Adam and we pass in the parameters of the model low_lr = args.getfloat('Training', 'LEARNING_RATE_LOW') high_lr = args.getfloat('Training', 'LEARNING_RATE_HIGH') optimizer = torch.optim.Adam(model.parameters(), lr=low_lr) lr_reducer = ExponentialLR(optimizer, high_lr, args.getfloat('Training', 'NUM_ITER')) # Prepare output directories output_path = args.get('Training', 'OUTPUT_PATH') prepareDir(output_path) shutil.copy(cfg_file_path, os.path.join(output_path, cfg_file_path.split("/")[-1])) # Prepare pipeline encoder = Encoder(args.get('Dataset', 'ENCODER_WEIGHTS')).to(device) encoder.eval() pipeline = Pipeline(encoder, model, device) # Handle loading of multiple object paths and translations try: model_path_data = json.loads(args.get('Dataset', 'MODEL_PATH_DATA')) translations = np.array(json.loads(args.get('Rendering', 'T'))) except: model_path_data = [args.get('Dataset', 'MODEL_PATH_DATA')] translations = [np.array(json.loads(args.get('Rendering', 'T')))] # Prepare datasets bg_path = "../../autoencoder_ws/data/VOC2012/JPEGImages/" training_data = DatasetGenerator(args.get('Dataset', 'BACKGROUND_IMAGES'), model_path_data, translations, args.getint('Training', 'BATCH_SIZE'), "not_used", device, args.get('Training', 'VIEW_SAMPLING')) training_data.max_samples = args.getint('Training', 'NUM_SAMPLES') # Start training np.random.seed(seed=args.getint('Training', 'RANDOM_SEED')) while (epoch < args.getint('Training', 'NUM_ITER')): # Train on synthetic data model = model.train() # Set model to train mode loss = runEpoch(br, training_data, model, device, output_path, t=translations, config=args) append2file([loss], os.path.join(output_path, "train-loss.csv")) append2file([lr_reducer.get_lr()], os.path.join(output_path, "learning-rate.csv")) # Plot losses val_losses = plotLoss( os.path.join(output_path, "train-loss.csv"), os.path.join(output_path, "train-loss.png"), validation_csv=os.path.join(output_path, "train-loss.csv"), ) print("-" * 20) print("Epoch: {0} - train loss: {1}".format(epoch, loss)) print("-" * 20) epoch = epoch + 1
from Pipeline import Pipeline pipeline_obj = Pipeline() pipeline_obj.update_organized_tweets() # nodes_array,links_array,cluster_id_key = pipeline_obj.get_cluster('2295420') # print nodes_array,links_array,cluster_id_key
def main(): # capture three samples and automatically scrolls five times SAMPLE_COUNT = 3 REPEAT_COUNT = 5 print "learning the pace of scrolling" pl = Pipeline() reader = LiveGeteventReader() pl.addStep(reader) pl.addStep(dtm.RawTraceParser()) pl.addStep(dtm.MultiTouchTypeAParser()) pl.addStep(dtm.RelativeTimingConverter()) pl.addStep(dtm.FingerDecomposer()) learner = AutoScrollingLearner(reader, SAMPLE_COUNT) pl.addStep(learner) pl.execute() (waitTime, xdelta, ydelta, duration, pointCount) = learner.getSpeedAndDelta() print "User scrolling parameters learned" device = EMonkeyDevice() (xmiddle, ymiddle) = (device.displayWidth / 2, device.displayHeight / 2) start = (xmiddle - xdelta / 2, ymiddle - ydelta / 2) end = (xmiddle + xdelta / 2, ymiddle + ydelta / 2) for _ in range(REPEAT_COUNT): device.sleep(waitTime) device.drag(start, end, duration, pointCount) print 'done'
# this file tests the random forest model from pandas import read_csv from Pipeline import Pipeline, load_pipeline data = read_csv("../Datasets/titanic.csv") pipeline = Pipeline() pipeline.fit(data) pred1 = pipeline.predict(pipeline.convert(data).drop("Survived", axis=1)) pipeline.save("tmp_files/pipeline_rf_titanic") del pipeline pipeline = load_pipeline("../tmp_files/pipeline_rf_titanic") pred2 = pipeline.predict(pipeline.convert(data).drop("Survived", axis=1)) print((pred1 != pred2).any())
def test_sum_var2_group_by(self): self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var2")).apply(self.df)["Var2_Sum"][0], 240) self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var2")).apply(self.df)["Var2_Sum"][1], 272) self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var2")).apply(self.df)["Var2_Sum"][2], 177)
"TASK": "classification", "TIME": "4m", "PREDICTED_COLUMN_NAME": "A16", "EVOLUTIONARY_MODEL_CONFIG": { "GENERAL_CRITERION": "BCE", "POPULATION_SIZE": 4, "NEURAL_NETWORK_EVOL_CONFIG": { "LEARNING_RATE_RANGE": [0.00001, 0.0005], } } } } pip = Pipeline(config=config) df = read_csv("../Datasets/credit.csv") mask = df.A16 == "+" column_name = 'A16' df.loc[mask, column_name] = 1 mask = df.A16 == "-" column_name = 'A16' df.loc[mask, column_name] = 0 df.to_csv("../Datasets/credit.csv") df = read_csv("../Datasets/credit.csv")
from Pipeline import Pipeline from visualization import visualize if __name__ == "__main__": print("> Lets go!") pipeline_structure = Pipeline("test_videos/two_people_two_directions.mp4") pipeline_structure.cut_into_frames() pipeline_structure.ssd_facedetection() pipeline_structure.get_directions() pipeline_structure.write_to_yaml() # pipeline_structure.show_frames() # pipeline_structure.simple_opencv_facedetection( # "/usr/local/lib/python3.9/site-packages/cv2/data/haarcascade_frontalface_default.xml") # pipeline_structure.mtcnn_facedetection() # pipeline_structure.show_faces(30) # pipeline_structure.show_gaze_detection(250) print("> Finished analyzing!") visualize() else: print("Sorry :(")
# this file contains a demo using the container_problem dataset from Pipeline import Pipeline, Splitter from pandas import read_csv data = read_csv("../Datasets/container_problem.csv") pipeline = Pipeline() model = pipeline.fit(data) # X, Y = Splitter.XYsplit(data, "runtime") X_conv = pipeline.convert(X) pred = model.predict(X_conv) pred.to_csv("Datasets/container_predicted", index=False)
def list(cdap_instance): ''' List all available Namespaces and Pipelines ''' click.echo('List Pipelines') p = Pipeline(cdap_instance) p.connect() p.list()
# Action recognition - models available : # ehpi # Face detection - models available : # MTCNN human_detection = True pose_estimation = True face_detection = True facial_landmarks_estimation = True facial_emotion_recognition = True action_recognition = False pipeline = Pipeline(regime='detection') if human_detection: human_detector = HumanDetector(model_name='yolo3_mobilenet1.0_coco', threshold=0.25, input_size=(512, 1024, 3), do_timing=True) multi_human_tracker = MultiObjectTracker(model_name='CSRT', do_timing=True) if pose_estimation: human_pose_estimator = HumanPoseEstimator( model_name='simple_pose_resnet18_v1b', threshold=0.20, do_timing=True) if face_detection: face_detector = FaceDetector(model_name='MTCNN', threshold=0.25, input_size=(256, 512, 3),
def main(): if len(sys.argv) <= 1: print "Usage: monkeyrunner DroidReplayer.py TRACE_PATH" print "The trace must be generated from getevent -lt [EVDEV]" return 1 print "Replay started" pl = Pipeline() pl.addStep(dtm.TextFileLineReader(sys.argv[1])) pl.addStep(dtm.RawTraceParser()) pl.addStep(dtm.MultiTouchTypeAParser()) pl.addStep(dtm.RelativeTimingConverter()) dev = EMonkeyDevice() pl.addStep(dtm.DeviceAdjuster(dev)) pl.addStep(dtm.FingerDecomposer()) pl.addStep(GestureReplayEventWrapper()) # this step might be necessary for a tablet # pl.addStep(dtm.TrailScaler(0.8,0.8)) # pl.addStep(dtm.TimeScaler(0.25)) # trouble maker # pl.addStep(TroubleInjector()) #replayers = [MonkeyHelperReplayer(dev), TroubleReplayer(dev)] #pl.addStep(CompositeReplayer(replayers)) pl.addStep(MonkeyHelperReplayer(dev)) pl.addStep(dtm.GenericPrinter()) pl.execute() print "Replay finished"
big_bang_time = cur_time() if useSwap or useEstDiff: print( 'Computing ASL rate and swap method (bootstrapped approach)' ) #computes: ASL rate, swap method (with replacement), bootstrapped approach. begin_time = cur_time() rand.seed(seed_) for iter_ in range(iterations): print('Computing iteration %d of %d, time=%f.' % (iter_ + 1, iterations, (cur_time() - begin_time) / 60)) data = getExperimentsFromParameterRanges( methodNames_parameters, list_of_splits, list_of_systemCounts, list_of_entityCounts, list_of_signValues, list_of_cutoffs, system_id_rank, totalRanks, bootstrapSize) pipe = Pipeline([computeSwapRate], nr_threads=nr_threads, update_interval=100, verbose=verbose) pipe.execute( data, f_result_handler=experiment_results.handleExperimentResult, chunksize=10, pool=pool) print('Done computing ASL rate and swap method. Time: %f\n' % ((cur_time() - begin_time) / 60)) if useErrTie: print( 'Computing Error and Tie rate' ) #computes: error and tie rate, relRate, signRate (without replacement) begin_time = cur_time() rand.seed(seed_)
def main(): arg = sys.argv ct = 'c0' clean = "n" if len(arg) == 2: mode = sys.argv[1] allowed_modes = ['ng', 'nr'] if str(mode) == 'cl': clean = 'y' if mode in ["c1", "c2", "c3"]: ct = mode if mode not in allowed_modes: mode = 'df' else: mode = 'df' PathtoFiles, testPipelineList, listTaxaInterest, blastCutOff, seqLenCompCutOff, tooSimCutOff, guidanceIter, seqcutoff, colcutoff, rescutoff, concatAlignment, majorClades = get_parameters( ) paramList = [ blastCutOff, seqLenCompCutOff, tooSimCutOff, guidanceIter, seqcutoff, colcutoff, rescutoff, concatAlignment ] print("\n** mode -> %s **" % mode) print( '################################################################################' ) print('') print('KATZLAB PHYLOGENOMICS PIPELINE') print('') print( 'This script assumes your data files and scripts are in the folders they came in. ' ) print( 'It also assumes there is a list of OGs you are interested in is in the Files folder.' ) print('') print('') print('PARAMETERS:') print('name of OG list = %s' % testPipelineList) print('name of list of taxa of interest = %s' % listTaxaInterest) print('Blast cutoff = %s' % blastCutOff) print('Sequence length cutoff = %s' % seqLenCompCutOff) print('Cluster cutoff (too similar) = %s' % tooSimCutOff) print('Number of Guidance iterations = %s' % guidanceIter) print('Guidance sequence cutoff = %s' % seqcutoff) print('Guidance colum cutoff = %s' % colcutoff) print('Guidance residue cutoff = %s' % rescutoff) print('Alignment for concatenation = %s' % concatAlignment) print('Major clades = %s' % majorClades) print( '################################################################################' ) if concatAlignment is not 'y' and concatAlignment is not 'n': print("\n*** your answer concatAlignment = " + concatAlignment + " is not correct. The pipeline takes 'n' as default ***") if ct == 'c0': if os.path.exists('../' + testPipelineList + '_results2keep'): print( 'terminating PhyloTOL: the folder ' + '../' + testPipelineList + '_results2keep exists. Choose another name for your OG list\n\n' ) quit() infile = open(PathtoFiles + '/' + testPipelineList, 'r').readlines() #list of ogs of interest if infile == []: print('terminating PhyloTOL: Your list of OGs is empty\n\n') quit() ''' MACR - Incorporated in v3, updated for v4 Since V3 we incorporated the file taxaDBpipeline4 (previously taxaDBpipeline3). This files contains all taxa in the databases "seed dataset - allOG5Files" and "added taxa - ncbiFiles and BlastFiles". This is important for all the procedures incorporated in V3 (e.g., similarity filter, overlap filter) ''' taxaDBfile = open(PathtoFiles + 'taxaDBpipeline4', 'r') taxaDBfile = taxaDBfile.readlines() taxaDB = [taxon.strip('\n') for taxon in taxaDBfile] if not os.path.exists(PathtoFiles + listTaxaInterest): print("you need to have a list of taxa of interest") quit() else: listTOI = open('%s%s' % (PathtoFiles, listTaxaInterest), 'r').readlines() taxa2SF = [] if listTOI[0] == "all\n": taxa2analyze = 'all' print("you chose to run your analysis with all taxa\n\n") else: taxaInterest = [] sf = '' ## MACR - from the taxa list specified for user take all taxa that match the database ## until it finds '#' as 'taxa to be analysed'. Then, take all taxa that follow # as ## 'taxa to apply similarity filter, SF' for taxon in listTOI: taxon = taxon.strip('\n') if '#' in taxon: sf = 'y' else: if sf is not 'y': if not taxon.startswith('-'): for taxonINdb in taxaDB: if taxon in taxonINdb: taxaInterest.append(taxonINdb) else: for taxonINdb in taxaDB: if taxon in taxonINdb: taxa2SF.append(taxonINdb.split(',')[2]) taxaInterest2 = list(taxaInterest) sf = '' for taxon in listTOI: if '#' in taxon: sf = 'y' else: if taxon.startswith('-'): taxon = taxon[1:].strip('\n') for taxonInterest in taxaInterest2: if taxon in taxonInterest: taxaInterest.remove(taxonInterest) taxaInterest = list(set(taxaInterest)) if taxaInterest: taxa2analyze = [] for taxon in taxaInterest: taxon2analyze = taxon.split(',')[2] taxa2analyze.append(taxon2analyze) else: taxa2analyze = [] if len(taxa2analyze) == 0: print( "none of your taxa of interest are in the pipeline database\n\n" ) quit() else: print("%s taxa will be analized\n\n%s\n\n" % (len(taxa2analyze), taxa2analyze)) if taxa2SF: print( "Similarity filter will be applied to these taxa:\n\n%s\n\n" % taxa2SF) # MACR - Creating files, folders and writing logfiles. PathtoOutput = '../my-data/' + testPipelineList + '_results/Output/' os.system('mkdir ../my-data/') os.system('mkdir ../my-data/' + testPipelineList + '_results') os.system('mkdir ../my-data/' + PathtoOutput) writelog(PathtoOutput, 'mode = ' + mode) writelog(PathtoOutput, 'testPipelineList = ' + testPipelineList) writelog(PathtoOutput, 'blastCutOff = ' + str(blastCutOff)) writelog(PathtoOutput, 'seqLenCompCutOff = ' + str(seqLenCompCutOff)) writelog(PathtoOutput, 'tooSimCutOff = ' + str(tooSimCutOff)) writelog(PathtoOutput, 'guidanceIter = ' + str(guidanceIter)) writelog(PathtoOutput, 'seqcutoff = ' + str(seqcutoff)) writelog(PathtoOutput, 'colcutoff = ' + str(colcutoff)) writelog(PathtoOutput, 'rescutoff = ' + str(rescutoff)) writelog( PathtoOutput, 'concatAlignment = ' + concatAlignment + ' (y = remove paralogs and make alignment, n = keep paralogs and do not make alignment)' ) writelog(PathtoOutput, 'majorClades = ' + str(majorClades)) # MACR -- V4 -- Added this method here for cleaning intermediary files and logs (for instance, after incomplete run or forced stoppage) using phylotol if clean == "y": Utilities.cleaner(testPipelineList, PathtoFiles, PathtoOutput) print("cleaning folders -- done!") quit() # MACR 03/04/19 -- added this for calculating og average length for OF and SF oglengths = open(PathtoOutput + "oglengths", "a") ogs = open(PathtoFiles + "/" + testPipelineList, "r").readlines() for og in ogs: og = og.strip() seq_len = {} ogFile = open(PathtoFiles + "/allOG5Files/" + og, "r").readlines() for line in ogFile: line = line.strip() if line.startswith(">"): tag = line seq_len[tag] = 0 else: seq_len[tag] += len(line) og_totalLength = 0 for seqLength in seq_len.values(): og_totalLength += seqLength averageLength = og_totalLength / len(seq_len.values()) oglengths.write("%s\t%s\n" % (og, averageLength)) oglengths.close() ''' MACR - Taxon step with changes for Pipeline 3 The next part of the code calls the Taxon class. The aim is to generate a folder the folder fasta2keep. That folder contains sequences from non-orthomcl taxa categorized as OGs. Here there are modifications for processing only the non-orthomcl taxa that are included in the list of taxa interest of the user. ''' for f in os.listdir(PathtoFiles + '/ncbiFiles'): # MACR - Pipeline 3: Given that the use provides a list of taxa of interest. Only takes the Blast reports of the taxa that match the list if taxa2analyze is not 'all': taxonBlast = f[:10] if taxonBlast in taxa2analyze: print('\n' + f + '\n') if f[0] != '.': try: newPipe = Pipeline(PathtoFiles + testPipelineList, PathtoFiles, ('queueTaxa', f), paramList, taxa2analyze, taxa2SF, majorClades, mode) except Exception as e: elog = open('errorlog', 'a') elog.write(f + " failed on %s with: %s" % (f, e)) elog.close() print("failed on %s with: %s" % (f, e)) else: # MACR - Pipeline 3: Given that the user does not provide a list of taxa of interest. Run pipeline for all taxa print('\n' + f + '\n') if f[0] != '.': try: newPipe = Pipeline(PathtoFiles + testPipelineList, PathtoFiles, ('queueTaxa', f), paramList, taxa2analyze, taxa2SF, majorClades, mode) except Exception as e: elog = open('errorlog', 'a') elog.write(f + " failed on %s with: %s" % (f, e)) elog.close() print("failed on %s with: %s" % (f, e)) ''' JG - Pipeline 3 run Gene step - just up to guidance. To do this, I made a new pipeline method (two, actually) called 'test_keep' and 'test_remove' keep and remove are for ingroup paralogs, as I wasn't sure which you wanted. MACR - Pipeline 3 Besides the changes described by Jessica above, there 3 major changes for pipeline 3: - A new Guidance method: This is a bash script made by Miguel Fonseca - A "helper" for the new Guidance method. This helper is a perl script that edits the intermediary files of each guidance run in order to allow the looping. - A new module called Utilities. I made this module to take the preguidance files and run Miguel's scripts. Once Guidance and raxml are done. Some functions of the module allow to continue to produce the alignments for concatenation. Finally it organizes all output files. MACR - Pipeline 3.1 - Neddle step and ingroup paralogs removal were replaced. Now we have an overlap filter (OF) and a similarity filter (SF). Both are performed using Usearch-Ublast. We removed this step from the taxon class and added as a separated script called 'iterUblast.py'. This script is called from the module 'Utilities'. The logic now is: All sequences per taxa should be 1.5 times smaller tha the average OG legth and pass the overlap filter. Then the user specify if she/he wants to run a similarity filter for the sequences. ''' os.system('mkdir ' + PathtoFiles + '/FileLists_' + testPipelineList) count = 0 li = [] for line in infile: if 'OG' in line: outfile = open( PathtoFiles + '/FileLists_' + testPipelineList + '/list' + str(count), 'w') li.append('list' + str(count)) outfile.write(line) outfile.close() count = count + 1 # MAC - for pipeline 3.1 all methods for gene step were replaced by this one try: for f in os.listdir(PathtoFiles + '/FileLists_' + testPipelineList): newPipe = Pipeline(PathtoFiles + '/' + testPipelineList, PathtoFiles, ('geneStep', f), paramList, taxa2analyze, taxa2SF, majorClades, mode) ### By inactivating the next line you can have access to all intermediary files Utilities.cleaner(testPipelineList, PathtoFiles, PathtoOutput) except Exception as e: elog = open('errorlog', 'a') line = open(PathtoFiles + '/FileLists_' + testPipelineList + '/' + f, 'r').read() elog.write(line + " failed on %s with: %s" % (f, e)) elog.close() print("failed on %s with: %s" % (f, e)) return True
}, "TRAINING": True, "TRAINING_CONFIG": { "TYPE": "evolutionary", "TASK": "", "TIME": "10s", "PREDICTED_COLUMN_NAME": "Survived" } } def print_stats(d): print("Stats {}".format(d)) pipeline = Pipeline(config=config) # fit the data to the pipeline model = pipeline.fit( data, verbose=False, training_callbacks=[ # EvolutionaryFeedback(print_stats), # PipelineFeedback(print_stats), # ModelTriedCallback(print_stats), ]) # summary = model.summary() # with open('summary.json', 'w') as outfile: # json.dump(summary, outfile) # # save the model for further reusage # print(summary.get("BEST_MODEL"))
li = [] for line in infile: if 'OG5_' in line: outfile = open( PathtoFiles + '/FileLists_' + testPipelineList + '/list' + str(count), 'w') li.append('list' + str(count)) outfile.write(line) outfile.close() count = count + 1 # MAC - for pipeline 3.1 all methods for gene step were replaced by this one try: for f in os.listdir(PathtoFiles + '/FileLists_' + testPipelineList): newPipe = Pipeline(PathtoFiles + '/' + testPipelineList, PathtoFiles, ('geneStep', f), paramList, taxa2analyze, taxa2SF, wholegenomeDB, mode) answer_Cleaner = '' valid_answers = ['y', 'n'] if ct == 'y': answer_Cleaner = 'y' else: while (answer_Cleaner not in valid_answers): answer_Cleaner = raw_input( "\n\nDo you want to execute the cleaner? (y/n): ") if (answer_Cleaner not in valid_answers): print "\n\nplease answer y or n"
insert_document_service_uri = 'http://localhost:3020/api/insertFromPipe' send_task_service_uri = 'http://localhost:3020/api/sendPipeTask' logfilename = 'PipelineLog.txt' ORGANIZATION = 'uic' GROUPNAME = 'uic' config = { 'chromedriver_path': chromedriver_path, 'xpdf_pdftohtml_path': xpdf_pdftohtml_path, 'imagemagick_convert_path': imagemagick_convert_path, 'figsplit_url': figsplit_url, 'insert_document_service_uri': insert_document_service_uri, 'send_task_service_uri': send_task_service_uri, 'organization': ORGANIZATION, 'groupname': GROUPNAME, 'logfilename': logfilename } input_folder = abspath( join(current_folder, '..', '..', 'input', 'pipeline_input')) output_folder = abspath( join(current_folder, '..', '..', 'output', 'pipeline_output')) #input_document_path = join(input_folder, '15350224.pdf') p = Pipeline(config) input_documents = listdir(input_folder) for input_doc in input_documents: input_document_path = join(input_folder, input_doc) result = p.process_file(input_document_path, output_folder)
def test_sum_var1_group_by(self): self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var1")).apply(self.df)["Var1_Sum"][0], 1147) self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var1")).apply(self.df)["Var1_Sum"][1], 1489) self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var1")).apply(self.df)["Var1_Sum"][2], 909)
import signal import time from Pipeline import Pipeline from components.all import MatrixSource, AuxSink #TODO config file to define pipeline structure along with rate, frames per buffer if __name__ == "__main__": rate = 48000 fpb = 128 #create the audio pipeline, just echos input from MATRIX Voice for now audio_pipeline = Pipeline() audio_pipeline.add(MatrixSource(rate=rate, frames_per_buffer=fpb)) audio_pipeline.add(AuxSink(rate=rate, frames_per_buffer=fpb)) #start the audio pipeline audio_pipeline.start() print("Pipeline started...") #set up interrupt handling def interrupt_handler(signum, sigframe): audio_pipeline.stop() print("Pipeline stopped!") exit(0) signal.signal(signal.SIGINT, interrupt_handler)
def export_all(cdap_instance): ''' Export All Pipelines ''' click.echo(click.style('Exporting All Pipeline(s)', fg='green', bold=True)) p = Pipeline(cdap_instance) p.connect() p.export()