Esempi in Python per Pipeline, esempi in Python per pipeline.pipeline.Pipeline

Esempio n. 1

0

Mostra file

def main():
    save_dir = "../data"
    image_file = "../data/test/karyotype.bmp"
    model_path = "../model/default_inference.h5"
    Pipeline.run(image_file=image_file,
                 save_dir=save_dir,
                 model_path=model_path)

Esempio n. 2

0

Mostra file

File: test_pipe_line.py Progetto: lntk/karyotyping

 def test_detect_interesting_points(self):
     image_file = data_dir + "/test/karyotype.bmp"
     image = image_utils.read_image(image_file)
     chromosomes = Pipeline.extract_chromosomes(image)
     straightened_chromosomes = Pipeline.straighten_chromosomes(chromosomes)
     _ = Pipeline.detect_interesting_points(straightened_chromosomes,
                                            verbose=True)

Esempio n. 3

0

Mostra file

File: test_command.py Progetto: hamster-dev/pipeline

def test_shell_command_exit():
    """Test that a single shell command functions properly
    """
    actions = [TaskAction("shell_command", name="exiter", commands=["exit 1"])]
    executor = Pipeline(actions)
    source = DummySource()

    result = executor.schedule(source).get()
    assert result.results["exiter"].returncode == 1

Esempio n. 4

0

Mostra file

File: test_pipe_line.py Progetto: lntk/karyotyping

 def test_organize_chromosomes(self):
     image_file = data_dir + "/test/karyotype.bmp"
     image = Pipeline.read_image(image_file)
     chromosomes = Pipeline.extract_chromosomes(image)
     straightened_chromosomes = Pipeline.straighten_chromosomes(chromosomes)
     # interesting_points = Pipeline.detect_interesting_points(straightened_chromosomes)
     interesting_points = None
     classified_chromosomes = Pipeline.classify_chromosomes(
         straightened_chromosomes, interesting_points)
     karyotyping_image = Pipeline.organize_chromosomes(
         classified_chromosomes)
     image_utils.show_image(karyotyping_image)

Esempio n. 5

0

Mostra file

def test_pipeline_produced_expected_data() -> bool:
    delete_existing_outputs(STORAGE_CONFIG)

    filename = os.path.basename(EXPECTED_FILE)
    pipeline = Pipeline(PIPELINE_CONFIG, STORAGE_CONFIG)
    pipeline.run(EXAMPLE_FILE)

    # Retrieve the output data file
    loc_id = pipeline.config.pipeline_definition.location_id
    datastream = DSUtil.get_datastream_name(config=pipeline.config)
    root: str = pipeline.storage._root
    output_file = os.path.join(root, loc_id, datastream, filename)

    # Assert that the basename of the processed file and expected file match
    assert os.path.isfile(output_file)

    # Compare data and optionally attributes to ensure everything matches.
    ds_out: xr.Dataset = xr.open_dataset(output_file)
    ds_exp: xr.Dataset = xr.open_dataset(EXPECTED_FILE)

    xr.testing.assert_allclose(ds_out, ds_exp)

Esempio n. 6

0

Mostra file

def main():
    inputs = {
        'channel_id': CHANNEL_ID,
        'search_word': 'incredible',
        'limit': 20,

    }
    steps = [
        Preflight(),
        GetVideoList(),  # 写成多行，增加易读性（最后一个建议有，）
        InitializeYT(),
        DownloadCaptions(),
        ReadCaption(),
        Search(),
        DownloadVideos(),
        EditVideo(),
        Postflight(),
    ]

    utils = Utils()
    p = Pipeline(steps)
    p.run(inputs, utils)

Esempio n. 7

0

Mostra file

File: test_command.py Progetto: hamster-dev/pipeline

def test_source_acquired():
    """Test that a single shell command is executed.
    This will acquire a source that installs flake8-diff as part
    of it's acquisition instructions, and then verify that
    flake8-diff is installed in the task itself.
    """
    actions = [
        TaskAction(
            "shell_command",
            name="installer",
            # workspace= 'python3',
            # workspace_kwargs= {
            #     "delete": False
            # },
            commands=["pip freeze |grep flake8-diff"],
        )
    ]
    executor = Pipeline(actions)
    source = DummySource()

    result = executor.schedule(source).get()
    assert result.results["installer"].returncode == 0

Esempio n. 8

0

Mostra file

File: test_pipe_line.py Progetto: lntk/karyotyping

 def test_straighten_chromosomes(self):
     image_file = data_dir + "/test/karyotype.bmp"
     image = image_utils.read_image(image_file)
     chromosomes = Pipeline.extract_chromosomes(image)
     _ = Pipeline.straighten_chromosomes(chromosomes, debug=True)

Esempio n. 9

0

Mostra file

File: __main__.py Progetto: Crop-Phenomics-Group/AirSurf-Wheat

from pipeline.pipeline import Pipeline
# from interface.interface import Interface

# Pipeline.getInstance().run_pipeline(".", img_path="/Users/bauera/work/airsurf/wheat/DFW_images/DFW_Early_19/19_05_29/DFW_Early_190529_transformed_small.png",hmap_path="/Users/bauera/work/airsurf/wheat/DFW_images/DFW_Early_19/19_05_29/DFW_Early_190529Height_Map_trans.png")
# Pipeline.getInstance().run_pipeline(".", parent_dir="/Users/bauera/work/airsurf/wheat/DFW_images/DFW_Early_19", seg_path="/Users/bauera/work/airsurf/wheat/DFW_images/DFW_Early_19/19_06_05")
# Pipeline.getInstance().run_pipeline(".", parent_dir="/Users/bauera/work/airsurf/wheat/DFW_images/DFW_Mid_19", seg_path="/Users/bauera/work/airsurf/wheat/DFW_images/DFW_Mid_19/19_05_29")
Pipeline.getInstance().run_pipeline(".", parent_dir="/Users/bauera/work/airsurf/wheat/DFW_images/DFW_Late_19", seg_path="/Users/bauera/work/airsurf/wheat/DFW_images/DFW_Late_19/19_05_29")
# Interface.getInstance().run()

Esempio n. 10

0

Mostra file

    args = parse_args()
    prepare_libraries(args)
    settings = Settings()
    settings.update(args)
    # Set up logging
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    log_file = settings.get("log_file")
    if not log_file or log_file == "NONE":
        handler = logging.StreamHandler(sys.stdout)
    else:
        handler = logging.FileHandler(settings.get("log_file"))
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    settings.set("logger", logger)
    # Print out the settings
    logger.info("**--**--**--**--**--**--**--**--**--**--**--**--**--**--**--**--**")
    logger.info("Settings used for this run of ScaffMatch are:")
    for s, v in settings.iteritems():
        if s in ["std_dev", "ins_size", "pair_mode"]:
            continue
        logger.info("    %s  -- %s" % (s, v)) 
    # Feed the settings to the scaffolder pipeline
    scaffolder = Pipeline()
    scaffolder.set_settings(settings)
    # Go!
    scaffolder.scaffold()
    logger.info("Done!")

Esempio n. 11

0

Mostra file

File: main.py Progetto: AbrahamSanders/gutenberg-dialog

def main():
    config = Config()
    parser = argparse.ArgumentParser(
        description='Code for building the Gutenberg Dialog Dataset')
    parser.add_argument('-dg',
                        '--dialog_gap',
                        default=config.dialog_gap,
                        help='Min. number of characters between two dialogs ' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument(
        '-isn',
        '--include_surrounding_narratives',
        default=config.include_surrounding_narratives,
        help='Whether to include surrounding narratives in the output dataset',
        action='store_true')
    parser.add_argument('-mnl',
                        '--max_narrative_length',
                        default=config.max_narrative_length,
                        help='Max. number of words in 1 narrative ' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument(
        '-minl',
        '--min_intermediate_narrative_length',
        default=config.min_intermediate_narrative_length,
        help=
        'Min. number of words in 1 intermediate narrative (a narrative which occurs in-line with dialog) '
        + '(default: %(default)s)',
        metavar='',
        type=int)
    parser.add_argument('-mul',
                        '--max_utterance_length',
                        default=config.max_utterance_length,
                        help='Max. number of words in 1 utterance ' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument('-mb',
                        '--max_books',
                        default=config.max_books,
                        help='Limit the number of books in final dataset ' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument('-md',
                        '--min_delimiters',
                        default=config.min_delimiters,
                        help='Min delimiters / 10000 words needed in a book ' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument('-mdd',
                        '--min_double_delim',
                        default=config.min_double_delim,
                        help='Double delimiter threshold (romance languages ' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument('-kl',
                        '--kl_threshold',
                        default=config.kl_threshold,
                        help='KL divergence threshold for filtering books ' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument('-st',
                        '--size_threshold',
                        default=config.size_threshold,
                        help='#words threshold for filtering with KL' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument('-cd',
                        '--clean_dialogs',
                        default=config.clean_dialogs,
                        help='Whether to run pre-processing on dialogs',
                        action='store_true')
    parser.add_argument('-vt',
                        '--vocab_threshold',
                        default=config.vocab_threshold,
                        help='Ratio of unknown words allowed in a dialog ' +
                        '(default: %(default)s)',
                        metavar='',
                        type=int)
    parser.add_argument('-l',
                        '--languages',
                        default=config.languages,
                        help='Comma separated language codes ' +
                        'for which to build datasets',
                        metavar='',
                        type=str)
    parser.add_argument('-d',
                        '--download',
                        default=config.download,
                        help='Whether to run download step',
                        action='store_true')
    parser.add_argument('-f1',
                        '--pre_filter',
                        default=config.pre_filter,
                        help='Whether to run pre-filter step',
                        action='store_true')
    parser.add_argument('-e',
                        '--extract',
                        default=config.extract,
                        help='Whether to run extracting step',
                        action='store_true')
    parser.add_argument('-f2',
                        '--post_filter',
                        default=config.post_filter,
                        help='Whether to run post filter step',
                        action='store_true')
    parser.add_argument('-c',
                        '--create_dataset',
                        default=config.create_dataset,
                        help='Whether to run create dataset step',
                        action='store_true')
    parser.add_argument('-a',
                        '--run_all',
                        default=config.run_all,
                        help='Whether to run all steps',
                        action='store_true')
    parser.add_argument('-dir',
                        '--directory',
                        default=config.directory,
                        help='Directory where the language folders are',
                        metavar='',
                        type=str)

    parser.parse_args(namespace=config)
    p = Pipeline(config)
    p.run()

Esempio n. 12

0

Mostra file

File: main.py Progetto: elieser1101/loglizer

sys.path.append(repo_path +
                '/logparser/logparser/LenMa/')  #for lenma __init__.py
sys.path.append(repo_path +
                '/logparser/logparser/LenMa/templateminer')  #for lenma
from pipeline.pipeline import Pipeline

input_dir = repo_path + '/'  # The input directory of log file
output_dir = repo_path + '/'  # The output directory of parsing results
log_file = 'dayco_log.log'  # The input log file name
log_format = '<smonth> <sday> <shour> <ip> <id> <id2> <month> <day> <hour> <city> <type> <Content>'  #dayco/rsyslog

pipeline = Pipeline(parser_algorithm='drain',
                    input_dir=input_dir,
                    parser_output_dir=output_dir,
                    log_file=log_file,
                    parser_regex=log_format,
                    feature_extractor='fixed_window',
                    log_analizer_algorithm='mining_invariants',
                    data_type='time_based',
                    elasticsearch_index_name='deepia')

para = {
    'path': repo_path + '/',  # directory for input data
    'log_file_name': 'dayco_log.log',  # filename for log data file
    'log_event_mapping':
    'dayco_log.logTemplateMap.csv',  # filename for log-event mapping. A list of event index, where each row represents a log
    'save_path':
    './time_windows/',  # dir for saving sliding window data files to avoid splitting
    #'select_column':[0,4],                      # select the corresponding columns (label and time) in the raw log file
    'select_column': [
        0, 1, 2

Esempio n. 13

0

Mostra file

File: hn_top_keywords_pipeline.py Progetto: vdymna/generic-python-pipeline

import json
import io
import csv
import string
from datetime import datetime

from pipeline.pipeline import Pipeline
from pipeline.csv_helper import CsvHelper

exclude_words = ('the', 'to', 'a', 'of', 'for', 'in', 'and', 'is', '–', 'on',
                 'hn:', 'an', 'at', 'not', 'with', 'why', 'how', 'your',
                 'from', 'new', 'you', 'i', 'by', 'what', 'my', 'are', 'as',
                 'that', 'we', 'it', 'be', 'now', 'using', 'has')

pipeline = Pipeline()
csv_helper = CsvHelper()


@pipeline.task()
def file_to_json():
    with open('hn_stories_2014.json', 'r') as file:
        data_dict = json.load(file)
        stories = data_dict['stories']
    return stories


@pipeline.task(depends_on=file_to_json)
def filter_stories(stories):
    def is_popular(story):
        return story['points'] > 50 and story[
            'num_comments'] > 1 and not story['title'].startswith('Ask HN')

Esempio n. 14

0

Mostra file

File: run_pipeline.py Progetto: brianlochanan/pulsar-python

"""
    Script for running the pipeline
"""
#pylint: disable-all
import os, sys, inspect

CURRENT_DIR = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))
PARENT_DIR = os.path.dirname(CURRENT_DIR)
sys.path.insert(0, PARENT_DIR)
from pipeline.pipeline import Pipeline

# init filterbank filename
fil_name = os.path.abspath("./pspm32.fil")
# init filterbank sample size
sample_size = 192
# init times the pipeline should run
n_times = 10

# run the filterbank n times
for i in range(n_times):
    # read static
    Pipeline(filename=fil_name, size=sample_size)
    # read stream, row per row
    Pipeline(filename=fil_name, as_stream=True)
    # read stream, n rows
    Pipeline(filename=fil_name, as_stream=True, n=sample_size)

Esempio n. 15

0

Mostra file

File: mom_strategy.py Progetto: CBA222/Python-Trading-Engine

    def make_pipeline(self):
        pipe = Pipeline('my_pipeline')
        pipe.add_factor('returns', Returns(window_length=150))

        return pipe

Esempio n. 16

0

Mostra file

 def __init__(
     self, pipeline_name="default", input_file="input.mp4", runtime_config=None
 ):
     update_preset_pipelines(input_file=input_file, runtime_config=runtime_config)
     Pipeline.__init__(self, preset_pipelines[pipeline_name])

Esempio n. 17

0

Mostra file

plugin_base = PluginBase(package='pipeline.modules')
modules = plugin_base.make_plugin_source(searchpath=[
    './pipeline/modules',
])


def setup_workspace():
    os.makedirs(workspace_location, exist_ok=True)
    os.makedirs(log_location, exist_ok=True)
    os.makedirs(output_location, exist_ok=True)


def fake_pipeline():
    open(pipeline_file, "w+").writelines(open(".pipeline").readlines())


def setup_docker():
    import docker
    return docker.from_env()


def test():
    setup_workspace()
    fake_pipeline()


if __name__ == "__main__":
    test()
    docker_client = setup_docker()
    pipeline = Pipeline(pipeline_file, docker_client, modules)

Esempio n. 18

0

Mostra file

File: main.py Progetto: charlie-sheils/ml_final_project

import pickle

import pandas as pd

from pipeline.pipeline import Pipeline

# read datafame
df = pd.read_pickle("./datasets/h1b_2019.pkl")

# load into pipeline
pl = Pipeline()
pl.load_data(df)
pl.train_test_split("CASE_STATUS")  # TODO: redo to set_target

Esempio n. 19

0

Mostra file

from pipeline.add_timestamp import AddTimestamp
from pipeline.write_json import WriteJSON
from pipeline.write_sitemaps import WriteSitemaps

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('SourceDirectory',
                        type=dir_path,
                        help="location of the vcpkg folder")
    parser.add_argument('-o',
                        type=dir_path,
                        help="output of the JSON file generated",
                        default="./")

    args = parser.parse_args()

    ports_path = os.path.join(args.SourceDirectory, "ports")
    triplets_path = os.path.join(args.SourceDirectory, "triplets")
    baseline_path = os.path.join(args.SourceDirectory,
                                 "scripts/ci.baseline.txt")
    version_path = os.path.join(args.SourceDirectory, "versions")
    data_out_path = os.path.join(args.o, "data")

    pipeline = Pipeline(ReadPackages(ports_path), AddUsage(ports_path),
                        AddTriplets(triplets_path), AddStatus(baseline_path),
                        AddVersion(version_path), AddTimestamp(),
                        WriteJSON(data_out_path, "libs.json"),
                        WriteSitemaps(args.o, "sitemap.txt"))

    pipeline.run()

Esempio n. 20

0

Mostra file

File: test_pipe_line.py Progetto: lntk/karyotyping

 def test_generate_chromosome_cluster(self):
     image_file = data_dir + "/test/karyotype.bmp"
     image = image_utils.read_image(image_file)
     image_utils.show_image(image)
     chromosome_cluster = Pipeline.generate_chromosome_cluster(image)
     image_utils.show_image(chromosome_cluster, cmap=None)

Esempio n. 21

0

Mostra file

File: test_pipe_line.py Progetto: lntk/karyotyping

 def test_read_image(self):
     image_file = data_dir + "/test/karyotype.bmp"
     image = Pipeline.read_image(image_file)
     image_utils.show_image(image)

Esempio n. 22

0

Mostra file

File: test_pipe_line.py Progetto: lntk/karyotyping

 def test_extract_chromosomes(self):
     image_file = data_dir + "/test/karyotype.bmp"
     image = image_utils.read_image(image_file)
     chromosomes = Pipeline.extract_chromosomes(image)
     for chromosome in chromosomes:
         image_utils.show_image(chromosome, cmap=None)

Esempio n. 23

0

Mostra file

File: csv_to_postgres_pipeline.py Progetto: vdymna/generic-python-pipeline

import io
from urllib import request
import csv
import psycopg2

from pipeline.pipeline import Pipeline

DATA_FILE_URL = 'https://dq-content.s3.amazonaws.com/251/storm_data.csv'

DB_HOST = 'localhost'
DB_NAME = ''  # set database name
DB_USER = ''  # set database user name
DB_PASSWORD = ''  # set database user password

pipeline = Pipeline()


@pipeline.task()
def create_db_connection():
    """Create database connection."""
    return psycopg2.connect(host=DB_HOST,
                            database=DB_NAME,
                            user=DB_USER,
                            password=DB_PASSWORD)


@pipeline.task(depends_on=create_db_connection)
def create_db_tables(db_conn):
    """Create database tables for staging and final data."""
    cursor = db_conn.cursor()

Esempio n. 24

0

Mostra file

import csv
import io
import json
import string
from collections import Counter
from datetime import datetime
from pprint import pprint
from pytz import timezone

from pipeline.pipeline import Pipeline, build_csv
from pipeline.stop_words import stop_words

pipeline = Pipeline()


def __get_start_end_dates(year):
    # Given a year, return the start end end timestamps in unix epoch
    utc = timezone("UTC")
    start = utc.localize(datetime(year, 1, 1)).timestamp()
    end = utc.localize(datetime(year + 1, 1, 1)).timestamp()

    return start, end


# TODO currently we are only getting 1 page, need iterate through pages to get full dataset
@pipeline.task()
def get_data_from_hacker_news(year=2014):
    import requests
    url = "http://hn.algolia.com/api/v1/search_by_date"
    start, end = __get_start_end_dates(year)
    query = {