Example #1
0
def get_locations():
	pipeline_obj = Pipeline();
	locations_list = pipeline_obj.get_locations()
	json_dict = {}
	json_dict['data'] = locations_list	
	print jsonify(json_dict)
	return jsonify(json_dict)
Example #2
0
def get_cluster(location_id):
	pipeline_obj = Pipeline()
	nodes_list,links_list = pipeline_obj.get_cluster(location_id)
	json_dict = {}
	json_dict['nodes'] = nodes_list 
	json_dict['links'] = links_list
	return jsonify(json_dict)
Example #3
0
def get_dates(location_id):
	pipeline_obj = Pipeline()
	dates_list = pipeline_obj.get_dates_location(location_id)
	json_dict = {}
	json_dict['data'] = dates_list 
	print json_dict
	return jsonify(json_dict)
Example #4
0
def get_tweets():
	trend = request.args.get('trend')
	entity = request.args.get('entity')
	pipeline_obj = Pipeline()
	tweets_list = pipeline_obj.get_tweets(trend,entity)
	json_dict = {}
	json_dict['data'] = tweets_list 
	return jsonify(json_dict)
Example #5
0
def get_tfidf():
	location_id = request.args.get('locationid')
	trend = request.args.get('trend')
	pipeline_obj = Pipeline()
	tfidf_list = pipeline_obj.get_tfidf(location_id,trend)
	json_dict = {}
	json_dict['data'] = tfidf_list 
	return jsonify(json_dict)
Example #6
0
def get_trends():
	location_id = request.args.get('locationid')
	min_date = request.args.get('min_date')
	max_date = request.args.get('max_date')
	pipeline_obj = Pipeline()
	trends_list = pipeline_obj.get_trends(location_id,min_date,max_date)
	json_dict = {}
	json_dict['data'] = trends_list
	return jsonify(json_dict)
def main():
    print "Replay started"
    pl = Pipeline()
    pl.addStep(EventGenerator(interval=3000,number=1000))
    pl.addStep(TroubleInjector())
    dev = dm.getDevice()
    pl.addStep(TroubleReplayer(dev))
    pl.execute()
    print "Replay finished"
Example #8
0
def buildPipelines(pipes, config):
    """
    Generate pipelines
    """
    pipesDict = {}
    for pipe in pipes:
        print config
        try :
            parms = config[pipe]
        except :
            parms = {}  
        
        print parms
        x = Pipeline(*parms)      
        x.create()
        pipesDict[pipe] = x
    return pipesDict
Example #9
0
def pipeline_from_config_file(config_file):
    """
    Create a Pipeline instance from a ConfigObj/INI configuration file 
    `config_file` which specifies the Pipeline steps, data directories
    etc.
    """
    # Do we have a spec file? If so, do parameter and input/output key 
    # validation as well. If not keep going.
    spec_file = utilities.find_spec_file(Pipeline)
    
    # Now do the actual parsing and, if we do have a spec file, validate as 
    # well.
    parsed = config_parser.loads(config_file, 
                                 specfile=spec_file)['pipeline']
    
    # Create a Pipeline instance with no steps, we will add them later.
    pipe = Pipeline(name=parsed['name'],
                    system=parsed['system'],
                    log_level=parsed.get('log_level', DEFAULT_LOG_LEVEL),
                    local_logs=parsed.get('local_log_mode', DEFAULT_LOCAL_LOGS))
    
    # The only thing that requires special handling is the steps array. 
    # Here we have to create Step instances of the appropriate class and
    # pass the appropriate Step config file to them.
    # Also, as part of the "steps" list, we have hints on which data each 
    # Step produces and which data it consumes. In order to transfer these
    # pieces of data in-memory between steps we have a simple architecture.
    # We have a dictionary at the Pipeline level where data is put and
    # possibly updated. This is the clipboard. Then before executing each 
    # Step, the data the Step needs in input is put in Step.inbox which
    # is a list. Elements are put in that list in the order they are defined
    # in that Step section of the Pipeline configuration file (inbox 
    # parameter). After the Step completes, data from Step.outbox is 
    # fetched and put in the clipboard. Data in Step.outbox is assumed to 
    # be in the order defined in that Step section of the Pipeline 
    # configuration file (outbox parameter).
    steps = [Step.from_parsed_config(x, pipe) for x in parsed['steps']]
    
    # Finally update the pipe.steps list. We did this so that the Step 
    # instances could make use in their initialization, of whatever they
    # needed to pull from the Pipeline object they belong to.
    pipe.configure(steps)
    return(pipe)
def build(config):
    stream = file(config, 'r')
    config = yaml.load(stream)
    pipes = config.get('pipes')
    pipeConfig = config.get('pipeConfig')
    pipesList = []
    
    for pipe in pipes:
    
        try :
            parms = pipeConfig[pipe]
        except :
            parms = {}  
        x = Pipeline(*parms)      
        x.create()
        pipesList.append(x)
    
    print pipesList
    return pipesList
def main():
    global optimizer, lr_reducer, views, epoch, pipeline
    # Read configuration file
    parser = argparse.ArgumentParser()
    parser.add_argument("experiment_name")
    arguments = parser.parse_args()

    cfg_file_path = os.path.join("./experiments", arguments.experiment_name)
    args = configparser.ConfigParser()
    args.read(cfg_file_path)

    # Prepare rotation matrices for multi view loss function
    eulerViews = json.loads(args.get('Rendering', 'VIEWS'))
    views = prepareViews(eulerViews)

    # Set the cuda device
    device = torch.device("cuda:0")
    torch.cuda.set_device(device)

    # Handle loading of multiple object paths
    try:
        model_path_loss = json.loads(args.get('Dataset', 'MODEL_PATH_LOSS'))
    except:
        model_path_loss = [args.get('Dataset', 'MODEL_PATH_LOSS')]

    # Set up batch renderer
    br = BatchRender(model_path_loss,
                     device,
                     batch_size=args.getint('Training', 'BATCH_SIZE'),
                     faces_per_pixel=args.getint('Rendering',
                                                 'FACES_PER_PIXEL'),
                     render_method=args.get('Rendering', 'SHADER'),
                     image_size=args.getint('Rendering', 'IMAGE_SIZE'),
                     norm_verts=args.getboolean('Rendering',
                                                'NORMALIZE_VERTICES'))

    # Set size of model output depending on pose representation - deprecated?
    pose_rep = args.get('Training', 'POSE_REPRESENTATION')
    if (pose_rep == '6d-pose'):
        pose_dim = 6
    elif (pose_rep == 'quat'):
        pose_dim = 4
    elif (pose_rep == 'axis-angle'):
        pose_dim = 4
    elif (pose_rep == 'euler'):
        pose_dim = 3
    else:
        print("Unknown pose representation specified: ", pose_rep)
        pose_dim = -1

    # Initialize a model using the renderer, mesh and reference image
    model = Model(num_views=len(views))
    model.to(device)

    # Create an optimizer. Here we are using Adam and we pass in the parameters of the model
    low_lr = args.getfloat('Training', 'LEARNING_RATE_LOW')
    high_lr = args.getfloat('Training', 'LEARNING_RATE_HIGH')
    optimizer = torch.optim.Adam(model.parameters(), lr=low_lr)
    lr_reducer = ExponentialLR(optimizer, high_lr,
                               args.getfloat('Training', 'NUM_ITER'))

    # Prepare output directories
    output_path = args.get('Training', 'OUTPUT_PATH')
    prepareDir(output_path)
    shutil.copy(cfg_file_path,
                os.path.join(output_path,
                             cfg_file_path.split("/")[-1]))

    # Prepare pipeline
    encoder = Encoder(args.get('Dataset', 'ENCODER_WEIGHTS')).to(device)
    encoder.eval()
    pipeline = Pipeline(encoder, model, device)

    # Handle loading of multiple object paths and translations
    try:
        model_path_data = json.loads(args.get('Dataset', 'MODEL_PATH_DATA'))
        translations = np.array(json.loads(args.get('Rendering', 'T')))
    except:
        model_path_data = [args.get('Dataset', 'MODEL_PATH_DATA')]
        translations = [np.array(json.loads(args.get('Rendering', 'T')))]

    # Prepare datasets
    bg_path = "../../autoencoder_ws/data/VOC2012/JPEGImages/"
    training_data = DatasetGenerator(args.get('Dataset', 'BACKGROUND_IMAGES'),
                                     model_path_data, translations,
                                     args.getint('Training', 'BATCH_SIZE'),
                                     "not_used", device,
                                     args.get('Training', 'VIEW_SAMPLING'))
    training_data.max_samples = args.getint('Training', 'NUM_SAMPLES')

    # Start training
    np.random.seed(seed=args.getint('Training', 'RANDOM_SEED'))
    while (epoch < args.getint('Training', 'NUM_ITER')):
        # Train on synthetic data
        model = model.train()  # Set model to train mode
        loss = runEpoch(br,
                        training_data,
                        model,
                        device,
                        output_path,
                        t=translations,
                        config=args)
        append2file([loss], os.path.join(output_path, "train-loss.csv"))
        append2file([lr_reducer.get_lr()],
                    os.path.join(output_path, "learning-rate.csv"))

        # Plot losses
        val_losses = plotLoss(
            os.path.join(output_path, "train-loss.csv"),
            os.path.join(output_path, "train-loss.png"),
            validation_csv=os.path.join(output_path, "train-loss.csv"),
        )
        print("-" * 20)
        print("Epoch: {0} - train loss: {1}".format(epoch, loss))
        print("-" * 20)
        epoch = epoch + 1
Example #12
0
from Pipeline import Pipeline 

pipeline_obj = Pipeline()
pipeline_obj.update_organized_tweets()
# nodes_array,links_array,cluster_id_key = pipeline_obj.get_cluster('2295420')
# print nodes_array,links_array,cluster_id_key
Example #13
0
def main():
    # capture three samples and automatically scrolls five times
    SAMPLE_COUNT = 3
    REPEAT_COUNT = 5
    print "learning the pace of scrolling"
    pl = Pipeline()
    reader = LiveGeteventReader()
    pl.addStep(reader)
    pl.addStep(dtm.RawTraceParser())
    pl.addStep(dtm.MultiTouchTypeAParser())
    pl.addStep(dtm.RelativeTimingConverter())
    pl.addStep(dtm.FingerDecomposer())
    learner = AutoScrollingLearner(reader, SAMPLE_COUNT)
    pl.addStep(learner)
    pl.execute()
    (waitTime, xdelta, ydelta, duration, pointCount) = learner.getSpeedAndDelta()
    print "User scrolling parameters learned"
    device = EMonkeyDevice()
    (xmiddle, ymiddle) = (device.displayWidth / 2, device.displayHeight / 2)
    start = (xmiddle - xdelta / 2, ymiddle - ydelta / 2)
    end = (xmiddle + xdelta / 2, ymiddle + ydelta / 2)
    for _ in range(REPEAT_COUNT):
        device.sleep(waitTime)
        device.drag(start, end, duration, pointCount)
    print 'done'
Example #14
0
# this file tests the random forest model
from pandas import read_csv
from Pipeline import Pipeline, load_pipeline

data = read_csv("../Datasets/titanic.csv")

pipeline = Pipeline()

pipeline.fit(data)

pred1 = pipeline.predict(pipeline.convert(data).drop("Survived", axis=1))
pipeline.save("tmp_files/pipeline_rf_titanic")

del pipeline

pipeline = load_pipeline("../tmp_files/pipeline_rf_titanic")
pred2 = pipeline.predict(pipeline.convert(data).drop("Survived", axis=1))

print((pred1 != pred2).any())
Example #15
0
 def test_sum_var2_group_by(self):
     self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var2")).apply(self.df)["Var2_Sum"][0], 240)
     self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var2")).apply(self.df)["Var2_Sum"][1], 272)
     self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var2")).apply(self.df)["Var2_Sum"][2], 177)
Example #16
0
        "TASK": "classification",
        "TIME": "4m",
        "PREDICTED_COLUMN_NAME": "A16",

        "EVOLUTIONARY_MODEL_CONFIG": {
            "GENERAL_CRITERION": "BCE",
            "POPULATION_SIZE": 4,

            "NEURAL_NETWORK_EVOL_CONFIG": {
                "LEARNING_RATE_RANGE": [0.00001, 0.0005],
            }
        }
    }
}

pip = Pipeline(config=config)


df = read_csv("../Datasets/credit.csv")

mask = df.A16 == "+"
column_name = 'A16'
df.loc[mask, column_name] = 1

mask = df.A16 == "-"
column_name = 'A16'
df.loc[mask, column_name] = 0

df.to_csv("../Datasets/credit.csv")

df = read_csv("../Datasets/credit.csv")
Example #17
0
from Pipeline import Pipeline
from visualization import visualize

if __name__ == "__main__":
    print("> Lets go!")
    pipeline_structure = Pipeline("test_videos/two_people_two_directions.mp4")
    pipeline_structure.cut_into_frames()
    pipeline_structure.ssd_facedetection()
    pipeline_structure.get_directions()
    pipeline_structure.write_to_yaml()
    # pipeline_structure.show_frames()
    # pipeline_structure.simple_opencv_facedetection(
    #     "/usr/local/lib/python3.9/site-packages/cv2/data/haarcascade_frontalface_default.xml")
    # pipeline_structure.mtcnn_facedetection()
    # pipeline_structure.show_faces(30)
    # pipeline_structure.show_gaze_detection(250)
    print("> Finished analyzing!")
    visualize()
else:
    print("Sorry :(")
Example #18
0
# this file contains a demo using the container_problem dataset

from Pipeline import Pipeline, Splitter
from pandas import read_csv

data = read_csv("../Datasets/container_problem.csv")

pipeline = Pipeline()

model = pipeline.fit(data)
#
X, Y = Splitter.XYsplit(data, "runtime")
X_conv = pipeline.convert(X)
pred = model.predict(X_conv)
pred.to_csv("Datasets/container_predicted", index=False)
Example #19
0
def list(cdap_instance):
    ''' List all available Namespaces and Pipelines '''
    click.echo('List Pipelines')
    p = Pipeline(cdap_instance)
    p.connect()
    p.list()
# Action recognition - models available :
#   ehpi

# Face detection - models available :
#   MTCNN

human_detection = True
pose_estimation = True

face_detection = True
facial_landmarks_estimation = True
facial_emotion_recognition = True

action_recognition = False

pipeline = Pipeline(regime='detection')

if human_detection:
    human_detector = HumanDetector(model_name='yolo3_mobilenet1.0_coco',
                                   threshold=0.25,
                                   input_size=(512, 1024, 3),
                                   do_timing=True)
    multi_human_tracker = MultiObjectTracker(model_name='CSRT', do_timing=True)
if pose_estimation:
    human_pose_estimator = HumanPoseEstimator(
        model_name='simple_pose_resnet18_v1b', threshold=0.20, do_timing=True)

if face_detection:
    face_detector = FaceDetector(model_name='MTCNN',
                                 threshold=0.25,
                                 input_size=(256, 512, 3),
Example #21
0
def main():
    if len(sys.argv) <= 1:
        print "Usage: monkeyrunner DroidReplayer.py TRACE_PATH"
        print "The trace must be generated from getevent -lt [EVDEV]"
        return 1
    print "Replay started"
    pl = Pipeline()
    pl.addStep(dtm.TextFileLineReader(sys.argv[1]))
    pl.addStep(dtm.RawTraceParser())
    pl.addStep(dtm.MultiTouchTypeAParser())
    pl.addStep(dtm.RelativeTimingConverter())
    dev = EMonkeyDevice()
    pl.addStep(dtm.DeviceAdjuster(dev))
    pl.addStep(dtm.FingerDecomposer())
    pl.addStep(GestureReplayEventWrapper())
    # this step might be necessary for a tablet
    # pl.addStep(dtm.TrailScaler(0.8,0.8))
    # pl.addStep(dtm.TimeScaler(0.25))
    # trouble maker
    # pl.addStep(TroubleInjector())
    #replayers = [MonkeyHelperReplayer(dev), TroubleReplayer(dev)]
    #pl.addStep(CompositeReplayer(replayers))
    pl.addStep(MonkeyHelperReplayer(dev))
    pl.addStep(dtm.GenericPrinter())
    pl.execute()
    print "Replay finished"
Example #22
0
    big_bang_time = cur_time()
    if useSwap or useEstDiff:
        print(
            'Computing ASL rate and swap method (bootstrapped approach)'
        )  #computes: ASL rate, swap method (with replacement), bootstrapped approach.
        begin_time = cur_time()
        rand.seed(seed_)
        for iter_ in range(iterations):
            print('Computing iteration %d of %d, time=%f.' %
                  (iter_ + 1, iterations, (cur_time() - begin_time) / 60))
            data = getExperimentsFromParameterRanges(
                methodNames_parameters, list_of_splits, list_of_systemCounts,
                list_of_entityCounts, list_of_signValues, list_of_cutoffs,
                system_id_rank, totalRanks, bootstrapSize)
            pipe = Pipeline([computeSwapRate],
                            nr_threads=nr_threads,
                            update_interval=100,
                            verbose=verbose)
            pipe.execute(
                data,
                f_result_handler=experiment_results.handleExperimentResult,
                chunksize=10,
                pool=pool)
        print('Done computing ASL rate and swap method. Time: %f\n' %
              ((cur_time() - begin_time) / 60))

    if useErrTie:
        print(
            'Computing Error and Tie rate'
        )  #computes: error and tie rate, relRate, signRate (without replacement)
        begin_time = cur_time()
        rand.seed(seed_)
Example #23
0
def main():
    arg = sys.argv
    ct = 'c0'
    clean = "n"
    if len(arg) == 2:
        mode = sys.argv[1]
        allowed_modes = ['ng', 'nr']
        if str(mode) == 'cl': clean = 'y'
        if mode in ["c1", "c2", "c3"]: ct = mode
        if mode not in allowed_modes: mode = 'df'
    else:
        mode = 'df'

    PathtoFiles, testPipelineList, listTaxaInterest, blastCutOff, seqLenCompCutOff, tooSimCutOff, guidanceIter, seqcutoff, colcutoff, rescutoff, concatAlignment, majorClades = get_parameters(
    )
    paramList = [
        blastCutOff, seqLenCompCutOff, tooSimCutOff, guidanceIter, seqcutoff,
        colcutoff, rescutoff, concatAlignment
    ]

    print("\n** mode -> %s **" % mode)

    print(
        '################################################################################'
    )
    print('')
    print('KATZLAB PHYLOGENOMICS PIPELINE')
    print('')
    print(
        'This script assumes your data files and scripts are in the folders they came in. '
    )
    print(
        'It also assumes there is a list of OGs you are interested in is in the Files folder.'
    )
    print('')
    print('')
    print('PARAMETERS:')
    print('name of OG list = %s' % testPipelineList)
    print('name of list of taxa of interest = %s' % listTaxaInterest)
    print('Blast cutoff = %s' % blastCutOff)
    print('Sequence length cutoff = %s' % seqLenCompCutOff)
    print('Cluster cutoff (too similar) = %s' % tooSimCutOff)
    print('Number of Guidance iterations = %s' % guidanceIter)
    print('Guidance sequence cutoff = %s' % seqcutoff)
    print('Guidance colum cutoff = %s' % colcutoff)
    print('Guidance residue cutoff = %s' % rescutoff)
    print('Alignment for concatenation = %s' % concatAlignment)
    print('Major clades = %s' % majorClades)
    print(
        '################################################################################'
    )

    if concatAlignment is not 'y' and concatAlignment is not 'n':
        print("\n*** your answer concatAlignment = " + concatAlignment +
              " is not correct. The pipeline takes 'n' as default ***")

    if ct == 'c0':
        if os.path.exists('../' + testPipelineList + '_results2keep'):
            print(
                'terminating PhyloTOL: the folder ' + '../' +
                testPipelineList +
                '_results2keep exists. Choose another name for your OG list\n\n'
            )
            quit()

    infile = open(PathtoFiles + '/' + testPipelineList,
                  'r').readlines()  #list of ogs of interest
    if infile == []:
        print('terminating PhyloTOL: Your list of OGs is empty\n\n')
        quit()
    '''
	MACR - Incorporated in v3, updated for v4
 	
 	Since V3 we incorporated the file taxaDBpipeline4 (previously taxaDBpipeline3). This files contains all taxa in the databases "seed dataset - allOG5Files" and 
 	"added taxa - ncbiFiles and BlastFiles". This is important for all the procedures incorporated in V3 (e.g., similarity filter, overlap filter)
 
	'''

    taxaDBfile = open(PathtoFiles + 'taxaDBpipeline4', 'r')
    taxaDBfile = taxaDBfile.readlines()
    taxaDB = [taxon.strip('\n') for taxon in taxaDBfile]

    if not os.path.exists(PathtoFiles + listTaxaInterest):
        print("you need to have a list of taxa of interest")
        quit()
    else:
        listTOI = open('%s%s' % (PathtoFiles, listTaxaInterest),
                       'r').readlines()
        taxa2SF = []
        if listTOI[0] == "all\n":
            taxa2analyze = 'all'
            print("you chose to run your analysis with all taxa\n\n")
        else:
            taxaInterest = []
            sf = ''

            ## MACR - from the taxa list specified for user take all taxa that match the database
            ## until it finds '#' as 'taxa to be analysed'. Then, take all taxa that follow # as
            ## 'taxa to apply similarity filter, SF'

            for taxon in listTOI:
                taxon = taxon.strip('\n')
                if '#' in taxon:
                    sf = 'y'
                else:
                    if sf is not 'y':
                        if not taxon.startswith('-'):
                            for taxonINdb in taxaDB:
                                if taxon in taxonINdb:
                                    taxaInterest.append(taxonINdb)
                    else:
                        for taxonINdb in taxaDB:
                            if taxon in taxonINdb:
                                taxa2SF.append(taxonINdb.split(',')[2])

            taxaInterest2 = list(taxaInterest)
            sf = ''
            for taxon in listTOI:
                if '#' in taxon:
                    sf = 'y'
                else:
                    if taxon.startswith('-'):
                        taxon = taxon[1:].strip('\n')
                        for taxonInterest in taxaInterest2:
                            if taxon in taxonInterest:
                                taxaInterest.remove(taxonInterest)

            taxaInterest = list(set(taxaInterest))

            if taxaInterest:
                taxa2analyze = []
                for taxon in taxaInterest:
                    taxon2analyze = taxon.split(',')[2]
                    taxa2analyze.append(taxon2analyze)
            else:
                taxa2analyze = []

            if len(taxa2analyze) == 0:
                print(
                    "none of your taxa of interest are in the pipeline database\n\n"
                )
                quit()
            else:
                print("%s taxa will be analized\n\n%s\n\n" %
                      (len(taxa2analyze), taxa2analyze))
                if taxa2SF:
                    print(
                        "Similarity filter will be applied to these taxa:\n\n%s\n\n"
                        % taxa2SF)


# MACR - Creating files, folders and writing logfiles.

    PathtoOutput = '../my-data/' + testPipelineList + '_results/Output/'
    os.system('mkdir ../my-data/')
    os.system('mkdir ../my-data/' + testPipelineList + '_results')
    os.system('mkdir ../my-data/' + PathtoOutput)
    writelog(PathtoOutput, 'mode = ' + mode)
    writelog(PathtoOutput, 'testPipelineList = ' + testPipelineList)
    writelog(PathtoOutput, 'blastCutOff = ' + str(blastCutOff))
    writelog(PathtoOutput, 'seqLenCompCutOff = ' + str(seqLenCompCutOff))
    writelog(PathtoOutput, 'tooSimCutOff = ' + str(tooSimCutOff))
    writelog(PathtoOutput, 'guidanceIter = ' + str(guidanceIter))
    writelog(PathtoOutput, 'seqcutoff = ' + str(seqcutoff))
    writelog(PathtoOutput, 'colcutoff = ' + str(colcutoff))
    writelog(PathtoOutput, 'rescutoff = ' + str(rescutoff))
    writelog(
        PathtoOutput, 'concatAlignment = ' + concatAlignment +
        ' (y = remove paralogs and make alignment, n = keep paralogs and do not make alignment)'
    )
    writelog(PathtoOutput, 'majorClades = ' + str(majorClades))

    # MACR -- V4 -- Added this method here for cleaning intermediary files and logs (for instance, after incomplete run or forced stoppage) using phylotol
    if clean == "y":
        Utilities.cleaner(testPipelineList, PathtoFiles, PathtoOutput)
        print("cleaning folders -- done!")
        quit()

    # MACR 03/04/19 -- added this for calculating og average length for OF and SF
    oglengths = open(PathtoOutput + "oglengths", "a")
    ogs = open(PathtoFiles + "/" + testPipelineList, "r").readlines()
    for og in ogs:
        og = og.strip()
        seq_len = {}
        ogFile = open(PathtoFiles + "/allOG5Files/" + og, "r").readlines()

        for line in ogFile:
            line = line.strip()

            if line.startswith(">"):
                tag = line
                seq_len[tag] = 0
            else:
                seq_len[tag] += len(line)

        og_totalLength = 0
        for seqLength in seq_len.values():
            og_totalLength += seqLength
        averageLength = og_totalLength / len(seq_len.values())
        oglengths.write("%s\t%s\n" % (og, averageLength))
    oglengths.close()
    '''
	MACR - Taxon step with changes for Pipeline 3

	The next part of the code calls the Taxon class. The aim is to generate a folder the folder fasta2keep. That folder
	contains sequences from non-orthomcl taxa categorized as OGs. Here there are modifications for processing only the 
	non-orthomcl taxa that are included in the list of taxa interest of the user. 
	'''

    for f in os.listdir(PathtoFiles + '/ncbiFiles'):

        # MACR - Pipeline 3: Given that the use provides a list of taxa of interest. Only takes the Blast reports of the taxa that match the list

        if taxa2analyze is not 'all':
            taxonBlast = f[:10]
            if taxonBlast in taxa2analyze:
                print('\n' + f + '\n')
                if f[0] != '.':
                    try:
                        newPipe = Pipeline(PathtoFiles + testPipelineList,
                                           PathtoFiles, ('queueTaxa', f),
                                           paramList, taxa2analyze, taxa2SF,
                                           majorClades, mode)
                    except Exception as e:
                        elog = open('errorlog', 'a')
                        elog.write(f + " failed on %s with: %s" % (f, e))
                        elog.close()
                        print("failed on %s with: %s" % (f, e))
        else:

            # MACR - Pipeline 3: Given that the user does not provide a list of taxa of interest. Run pipeline for all taxa

            print('\n' + f + '\n')
            if f[0] != '.':
                try:
                    newPipe = Pipeline(PathtoFiles + testPipelineList,
                                       PathtoFiles, ('queueTaxa', f),
                                       paramList, taxa2analyze, taxa2SF,
                                       majorClades, mode)
                except Exception as e:
                    elog = open('errorlog', 'a')
                    elog.write(f + " failed on %s with: %s" % (f, e))
                    elog.close()
                    print("failed on %s with: %s" % (f, e))
    '''
	JG - Pipeline 3
	
	run Gene step - just up to guidance.
	To do this, I made a new pipeline method (two, actually) called 'test_keep' and 'test_remove'
	keep and remove are for ingroup paralogs, as I wasn't sure which you wanted.
	
	MACR - Pipeline 3
	
	Besides the changes described by Jessica above, there 3 major changes for pipeline 3:
	- A new Guidance method: This is a bash script made by Miguel Fonseca
	- A "helper" for the new Guidance method. This helper is a perl script that edits the intermediary
	files of each guidance run in order to allow the looping.
	- A new module called Utilities. I made this module to take the preguidance files and run Miguel's 
	scripts. Once Guidance and raxml are done. Some functions of the module allow to continue to produce the alignments
	for concatenation. Finally it organizes all output files.
	
	MACR - Pipeline 3.1

	- Neddle step and ingroup paralogs removal were replaced. Now we have an overlap filter (OF) and a similarity filter (SF). Both are performed using 
	Usearch-Ublast. We removed this step from the taxon class and added as a separated script called 'iterUblast.py'. This 
	script is called from the module 'Utilities'.
	
	The logic now is: All sequences per taxa should be 1.5 times smaller tha the average OG legth and pass the overlap filter. Then the user specify 
	if she/he wants to run a similarity filter for the sequences.
	'''

    os.system('mkdir ' + PathtoFiles + '/FileLists_' + testPipelineList)
    count = 0
    li = []
    for line in infile:
        if 'OG' in line:
            outfile = open(
                PathtoFiles + '/FileLists_' + testPipelineList + '/list' +
                str(count), 'w')
            li.append('list' + str(count))
            outfile.write(line)
            outfile.close()
            count = count + 1

    # MAC - for pipeline 3.1 all methods for gene step were replaced by this one
    try:
        for f in os.listdir(PathtoFiles + '/FileLists_' + testPipelineList):
            newPipe = Pipeline(PathtoFiles + '/' + testPipelineList,
                               PathtoFiles, ('geneStep', f), paramList,
                               taxa2analyze, taxa2SF, majorClades, mode)

        ### By inactivating the next line you can have access to all intermediary files
        Utilities.cleaner(testPipelineList, PathtoFiles, PathtoOutput)

    except Exception as e:
        elog = open('errorlog', 'a')
        line = open(PathtoFiles + '/FileLists_' + testPipelineList + '/' + f,
                    'r').read()
        elog.write(line + " failed on %s with: %s" % (f, e))
        elog.close()
        print("failed on %s with: %s" % (f, e))
    return True
Example #24
0
    },
    "TRAINING": True,
    "TRAINING_CONFIG": {
        "TYPE": "evolutionary",
        "TASK": "",
        "TIME": "10s",
        "PREDICTED_COLUMN_NAME": "Survived"
    }
}


def print_stats(d):
    print("Stats {}".format(d))


pipeline = Pipeline(config=config)

# fit the data to the pipeline
model = pipeline.fit(
    data,
    verbose=False,
    training_callbacks=[
        # EvolutionaryFeedback(print_stats),
        # PipelineFeedback(print_stats),
        # ModelTriedCallback(print_stats),
    ])
# summary = model.summary()
# with open('summary.json', 'w') as outfile:
#     json.dump(summary, outfile)
# # save the model for further reusage
# print(summary.get("BEST_MODEL"))
    li = []
    for line in infile:
        if 'OG5_' in line:
            outfile = open(
                PathtoFiles + '/FileLists_' + testPipelineList + '/list' +
                str(count), 'w')
            li.append('list' + str(count))
            outfile.write(line)
            outfile.close()
            count = count + 1

    # MAC - for pipeline 3.1 all methods for gene step were replaced by this one
    try:
        for f in os.listdir(PathtoFiles + '/FileLists_' + testPipelineList):
            newPipe = Pipeline(PathtoFiles + '/' + testPipelineList,
                               PathtoFiles, ('geneStep', f), paramList,
                               taxa2analyze, taxa2SF, wholegenomeDB, mode)

        answer_Cleaner = ''
        valid_answers = ['y', 'n']

        if ct == 'y':
            answer_Cleaner = 'y'
        else:
            while (answer_Cleaner not in valid_answers):
                answer_Cleaner = raw_input(
                    "\n\nDo you want to execute the cleaner? (y/n): ")

                if (answer_Cleaner not in valid_answers):
                    print "\n\nplease answer y or n"
Example #26
0
insert_document_service_uri = 'http://localhost:3020/api/insertFromPipe'
send_task_service_uri = 'http://localhost:3020/api/sendPipeTask'
logfilename = 'PipelineLog.txt'

ORGANIZATION = 'uic'
GROUPNAME = 'uic'

config = {
    'chromedriver_path': chromedriver_path,
    'xpdf_pdftohtml_path': xpdf_pdftohtml_path,
    'imagemagick_convert_path': imagemagick_convert_path,
    'figsplit_url': figsplit_url,
    'insert_document_service_uri': insert_document_service_uri,
    'send_task_service_uri': send_task_service_uri,
    'organization': ORGANIZATION,
    'groupname': GROUPNAME,
    'logfilename': logfilename
}

input_folder = abspath(
    join(current_folder, '..', '..', 'input', 'pipeline_input'))
output_folder = abspath(
    join(current_folder, '..', '..', 'output', 'pipeline_output'))
#input_document_path = join(input_folder, '15350224.pdf')

p = Pipeline(config)
input_documents = listdir(input_folder)
for input_doc in input_documents:
    input_document_path = join(input_folder, input_doc)
    result = p.process_file(input_document_path, output_folder)
Example #27
0
 def test_sum_var1_group_by(self):
     self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var1")).apply(self.df)["Var1_Sum"][0], 1147)
     self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var1")).apply(self.df)["Var1_Sum"][1], 1489)
     self.assertEqual(Pipeline(GroupBy("Cat"), Sum("Var1")).apply(self.df)["Var1_Sum"][2], 909)
Example #28
0
import signal
import time

from Pipeline import Pipeline
from components.all import MatrixSource, AuxSink


#TODO config file to define pipeline structure along with rate, frames per buffer
if __name__ == "__main__":
  rate = 48000
  fpb = 128
  
  #create the audio pipeline, just echos input from MATRIX Voice for now
  audio_pipeline = Pipeline()
  
  audio_pipeline.add(MatrixSource(rate=rate, frames_per_buffer=fpb))
  audio_pipeline.add(AuxSink(rate=rate, frames_per_buffer=fpb))
  
  #start the audio pipeline
  audio_pipeline.start()
  
  print("Pipeline started...")
  
  
  #set up interrupt handling
  def interrupt_handler(signum, sigframe):
    audio_pipeline.stop()
    print("Pipeline stopped!")
    exit(0)
  signal.signal(signal.SIGINT, interrupt_handler)
  
Example #29
0
def export_all(cdap_instance):
    ''' Export All Pipelines '''
    click.echo(click.style('Exporting All Pipeline(s)', fg='green', bold=True))
    p = Pipeline(cdap_instance)
    p.connect()
    p.export()