Ejemplo n.º 1
0
def test_client():
    c = v3f.Client('localhost:8081', should_check_version=False)
    assert isinstance(c, v3f.gRPCClient), 'default is not grpc'
    c = v3f.Client('grpc://localhost:8081', should_check_version=False)
    assert isinstance(c, v3f.gRPCClient), 'not gRPC'
    c = v3f.Client('http://localhost:8081', should_check_version=False)
    assert isinstance(c, v3f.HTTPClient), 'not HTTP'
Ejemplo n.º 2
0
def test_client():
    c = v3f.Client('localhost:8081')
    assert isinstance(c, v3f.gRPCClient), 'default is not grpc'
    c = v3f.Client('grpc://localhost:8081')
    assert isinstance(c, v3f.gRPCClient), 'not gRPC'
    c = v3f.Client('http://localhost:8081')
    assert isinstance(c, v3f.HTTPClient), 'not HTTP'
Ejemplo n.º 3
0
def handler(context, event):
    kv_table_path = 'iguazio/demos/face-recognition/artifacts/encodings'
    user_name = 'iguazio'
    password = '******'
    client = v3f.Client("framesd:8081",
                        user=user_name,
                        password=password,
                        container="users")
    df = client.read(backend='kv', table=kv_table_path, reset_index=True)
    context.logger.info(df.head())
    df2 = df[['fileName', 'camera', 'label', 'imgUrl']]
    options = ['unknown']
    df3 = df2[df2.fileName.str.startswith(tuple(options))]
    for idx in range(len(df3)):
        img_url = df3.iloc[idx]['imgUrl']
        splited = img_url.split("/")
        destination = "/".join((splited[0], splited[1], splited[2], splited[3],
                                "dataset/label_pending"))
        print(img_url)
        print(idx)
        print(splited)
        print(destination)
        # Move the content of
        # source to destination

        dest = shutil.move(img_url, destination)
Ejemplo n.º 4
0
def init_context(context):
    # IGZ variables
    igz_v3f = os.getenv('IGZ_V3F')
    igz_v3f_port = os.getenv('IGZ_V3F_PORT')

    # MYSQL variables
    host = os.getenv('SQL_HOST')
    port = os.getenv('SQL_PORT')
    user = os.getenv('SQL_USER')
    password = os.getenv('SQL_PWD', "")
    database = os.getenv('SQL_DB_NAME')

    # Init v3io-frames connection and set it as a context attribute
    client = v3f.Client(address=f'{igz_v3f}:{igz_v3f_port}',
                        password=os.getenv('IGZ_PWD'))
    setattr(context, 'client', client)

    # Init DB connection and set it as a context attribute
    dbconn = pymysql.connect(host=host,
                             port=int(port),
                             user=user,
                             passwd=password,
                             db=database,
                             charset='utf8mb4')
    setattr(context, 'dbconn', dbconn)
Ejemplo n.º 5
0
def test_client_env():
    url = 'localhost:8080'
    data = json.dumps({'url': url})
    with setenv(v3f.SESSION_ENV_KEY, data):
        c = v3f.Client('localhost:8081')

    assert c.session.url == url, 'missing URL from env'
Ejemplo n.º 6
0
def test_concurrent(framesd, protocol):
    addr = getattr(framesd, '{}_addr'.format(protocol))
    c = v3f.Client(addr)
    start = monotonic()
    with ThreadPoolExecutor() as pool:
        for i in range(7):
            pool.submit(reader, i, 5, c)
    duration = monotonic() - start
    print('duration: {:.3f}sec'.format(duration))
Ejemplo n.º 7
0
def read_encodings_table(params):
    client = v3f.Client(address=params.frames_url,
                        token=params.token,
                        container=params.container)
    encoding_df = client.read(backend="kv",
                              table=params.encodings_path,
                              reset_index=False,
                              filter='label != -1')
    return encoding_df
Ejemplo n.º 8
0
def train(context, processed_data, model_name='model.bst'):

    device = torch.device("cpu")
    context.logger.info('Client')
    client = v3f.Client('framesd:8081', container="users")
    with open(processed_data.url, 'r') as f:
        t = f.read()

    context.logger.info('Loading dataset')
    data_df = client.read(backend="kv",
                          table=t,
                          reset_index=False,
                          filter='label != -1')
    X = data_df[['c' + str(i).zfill(3) for i in range(128)]].values
    y = data_df['label'].values

    n_classes = len(set(y))

    X = torch.as_tensor(X, device=device)
    y = torch.tensor(y, device=device).reshape(-1, 1)

    input_dim = 128
    hidden_dim = 64
    output_dim = n_classes

    context.logger.info('Preparing model architecture')
    spec = importlib.util.spec_from_file_location('models', MODELS_PATH)
    models = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(models)

    model = models.FeedForwardNeuralNetModel(input_dim, hidden_dim, output_dim)
    model.to(device)
    model = model.double()

    criterion = nn.CrossEntropyLoss()
    learning_rate = 0.05
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    dataset = data.TensorDataset(X, y)
    train_loader = data.DataLoader(dataset)

    context.logger.info('Starting training process')
    for epoch in range(20):
        for features, target in train_loader:
            optimizer.zero_grad()
            out = model(features)
            loss = criterion(out, target[0])
            loss.backward()
            optimizer.step()

    context.logger.info('Save model')
    dump(model._modules, open(model_name, 'wb'))
    context.log_artifact('model',
                         src_path=model_name,
                         target_path=model_name,
                         labels={'framework': 'Pytorch-FeedForwardNN'})
    os.remove(model_name)
Ejemplo n.º 9
0
def create_streams_v1alpha1(project_graph=''):
    for stream in project_graph['project']['v3io_streams']:
        try:
            client = v3f.Client("framesd:8081", container=stream['container'])
            client.create("stream",
                          table=stream['path'],
                          shards=stream['shards'],
                          retention_hours=stream['retention'],
                          if_exists=0)
        except:
            print("Failed to create stream", stream)
            raise
Ejemplo n.º 10
0
def test_timestamp(framesd, session, protocol):
    backend = 'kv'
    test_id = uuid4().hex
    tableName = 'integtest{}'.format(test_id)

    addr = getattr(framesd, '{}_addr'.format(protocol))
    client = v3f.Client(addr, **session)

    df = pd.DataFrame({'birthday': [pd.Timestamp('1940-04-25', tz='Asia/Dubai'), pd.Timestamp('1940-04-25', tz='US/Pacific'), None, pd.Timestamp('1940-04-25')]})
    client.write(backend, table=tableName, dfs=df)

    df = client.read(backend, table=tableName)

    client.delete(backend, tableName)
Ejemplo n.º 11
0
def create_streams_v0_1(project_graph=''):
    _streams = project_graph['project']['v3io_streams']
    for stream in _streams.keys():
        try:
            client = v3f.Client("framesd:8081",
                                container=_streams[stream]['container'])
            client.create("stream",
                          table=_streams[stream]['path'],
                          shards=_streams[stream]['shards'],
                          retention_hours=_streams[stream]['retention'],
                          if_exists=0)
        except Exception as e:
            print("Failed to create stream", stream, e)
            raise
Ejemplo n.º 12
0
def init_context(context):
    # Init v3io-frames connection and set it as a context attribute
    client = v3f.Client(address=IGZ_V3F,
                        user=IGZ_USER,
                        password=IGZ_PWD,
                        container=CONTAINER)
    setattr(context, 'client', client)

    connection_string = f"mysql://{SQL_USER}:{SQL_PWD}@{SQL_HOST}:{SQL_PORT}/{SQL_DB_NAME}"
    engine = create_engine(connection_string, encoding='utf8', convert_unicode=True, isolation_level='READ_COMMITTED')
    session = sessionmaker()
    session.configure(bind=engine)
    dbconn = session()
    setattr(context, 'dbconn', dbconn)
Ejemplo n.º 13
0
def test_datetime(framesd, session, protocol):
    backend = 'kv'
    test_id = uuid4().hex
    tableName = 'integtest{}'.format(test_id)

    addr = getattr(framesd, '{}_addr'.format(protocol))
    client = v3f.Client(addr, **session)

    col = pd.Series([datetime.now(pytz.timezone("Africa/Abidjan")), datetime.now(pytz.timezone("America/Nassau")), None, datetime.now()])
    df = pd.DataFrame({'col': col})
    client.write(backend, table=tableName, dfs=df)

    df = client.read(backend, table=tableName)

    client.delete(backend, tableName)
Ejemplo n.º 14
0
def test_client_wrong_params(proto, cls):
    address = '{}://localhost:8080'.format(proto)
    session_params = {
        'data_url': 'http://iguazio.com',
        'container': 'large one',
        'user': '******',
        'password': '******',
        'token': 'a quarter',
    }

    try:
        v3f.Client(address, should_check_version=False, **session_params)
        raise ValueError('expected fail but finished successfully')
    except ValueError:
        return
Ejemplo n.º 15
0
def test_client(proto, cls):
    address = '{}://localhost:8080'.format(proto)
    session_params = {
        'data_url': 'http://iguazio.com',
        'container': 'large one',
        'user': '******',
        'password': '******',
    }

    client = v3f.Client(address, should_check_version=False, **session_params)
    assert client.__class__ is cls, 'wrong class'
    for key, value in session_params.items():
        key = 'url' if key == 'data_url' else key
        assert getattr(client.session, key) == value, \
            'bad session value for {}'.format(key)
Ejemplo n.º 16
0
def test_integration(framesd, session, protocol, backend):
    test_id = uuid4().hex
    size = 293
    table = 'integtest{}'.format(test_id)

    addr = getattr(framesd, '{}_addr'.format(protocol))
    client = v3f.Client(addr, **session)
    cfg = test_config.get(backend, {})
    df = cfg['df_fn'](size)

    create_kw = cfg.get('create', {})
    if create_kw is not None:
        client.create(backend, table, **create_kw)

    write_kw = cfg.get('write', {})

    labels = {}
    if backend == 'tsdb':
        labels = {
            'li': 17,
            'lf': 3.22,
            'ls': 'hi',
        }

    client.write(backend, table, [df], **write_kw, labels=labels)
    sleep(1)  # Let db flush

    read_kw = cfg.get('read', {})
    dfs = list(client.read(backend, table=table, iterator=True, **read_kw))
    df2 = pd.concat(dfs)

    if backend == 'tsdb':
        compare_dfs_tsdb(df, df2, backend)
    elif backend == 'stream':
        compare_dfs_stream(df, df2, backend)
    else:
        if backend == 'kv':
            # FIXME: Probably the schema
            df2.dropna(inplace=True)
        compare_dfs(df, df2, backend)

    df = client.read(backend, table=table, **read_kw)
    assert isinstance(df, pd.DataFrame), 'iterator=False returned generator'

    client.delete(backend, table)
    exec_kw = cfg.get('execute', {})
    if exec_kw is not None:
        client.execute(backend, table, **exec_kw)
Ejemplo n.º 17
0
def test_cudf(framesd, session):
    df = cudf.DataFrame({
        'a': [1, 2, 3],
        'b': [1.1, 2.2, 3.3],
    })

    c = v3f.Client(framesd.grpc_addr, frame_factory=cudf.DataFrame)
    backend = 'csv'
    table = 'cudf-{}'.format(int(time()))
    print('table = {}'.format(table))

    c.write(backend, table, [df])
    sleep(1)  # Let db flush
    rdf = c.read(backend, table=table)
    assert isinstance(rdf, cudf.DataFrame), 'not a cudf.DataFrame'
    assert len(rdf) == len(df), 'wrong frame size'
    assert set(rdf.columns) == set(df.columns), 'columns mismatch'
Ejemplo n.º 18
0
def test_kv_read_empty_df(framesd, session, protocol):
    backend = 'kv'
    test_id = uuid4().hex
    tableName = 'integtest{}'.format(test_id)

    addr = getattr(framesd, '{}_addr'.format(protocol))
    client = v3f.Client(addr, **session)

    index = [str(i) for i in range(1, 4)]
    df = pd.DataFrame(data={'col1': [i for i in range(1, 4)], 'col2': ['aaa', 'bad', 'cffd']}, index=index)
    client.write(backend, table=tableName, dfs=df, condition="starts({col2}, 'aaa') AND {col1} == 3")

    df = client.read(backend, table=tableName)
    assert df.to_json() == '{}'
    assert isinstance(df, pd.DataFrame), 'iterator=False returned generator'

    client.delete(backend, tableName)
def init_context(context):
    v3io_client = v3io.dataplane.Client()
    setattr(context, "v3io_client", v3io_client)

    v3f_client = v3f.Client("framesd:8081", container="bigdata")
    setattr(context, "v3f", v3f_client)
    window = []
    setattr(context, "window", window)
    setattr(context, "window_size", int(os.getenv("window_size", 10)))
    setattr(context, "tsdb_table", os.getenv("tsdb_table", "concept_drift_tsdb_1"))
    try:
        context.v3f.create("tsdb", context.tsdb_table, rate="1/s", if_exists=1)
    except Exception as e:
        context.logger.info(f"Creating context with rate= faile for {e}")
        context.v3f.create(
            "tsdb", context.tsdb_table, attrs={"rate": "1/s"}, if_exists=1
        )

    callbacks = [callback.strip() for callback in os.getenv("callbacks", "").split(",")]
    setattr(context, "callbacks", callbacks)

    setattr(context, "drift_stream", os.getenv("drift_stream", "/bigdata/drift_stream"))
    try:
        create_stream(
            context, context.drift_stream, int(os.getenv("drift_stream_shards", 1))
        )
    except:
        context.logger.info(f"{context.drift_stream} already exists")

    models = {}
    model_types = ["pagehinkely", "ddm", "eddm"]
    path_suffix = "_model_path"
    for model in model_types:
        model_env = f"{model}{path_suffix}"
        if model_env in os.environ:
            with open(os.environ[model_env], "rb") as f:
                models[model] = load(f)
    setattr(context, "models", models)

    setattr(context, "label_col", os.getenv("label_col", "label"))
    setattr(context, "prediction_col", os.getenv("prediction_col", "prediction"))
Ejemplo n.º 20
0
def init_context(context):
    # MYSQL variables
    host = os.getenv('SQL_HOST')
    port = os.getenv('SQL_PORT')
    user = os.getenv('SQL_USER')
    password = os.getenv('SQL_PWD', "")
    database = os.getenv('SQL_DB_NAME')

    # Init v3io-frames connection and set it as a context attribute
    client = v3f.Client(address=os.getenv('IGZ_V3F'),
                        username=os.getenv('IGZ_USER'),
                        password=os.getenv('IGZ_PWD'),
                        container=os.getenv('CONTAINER'))
    setattr(context, 'client', client)

    connection_string = f"mysql://{user}:{password}@{host}:{port}/{database}"
    engine = create_engine(connection_string,
                           encoding='utf8',
                           convert_unicode=True,
                           isolation_level='READ_COMMITTED')
    session = sessionmaker()
    session.configure(bind=engine)
    dbconn = session()
    setattr(context, 'dbconn', dbconn)
Ejemplo n.º 21
0
                    $('#browsed').attr("src", window.URL.createObjectURL(xhr.response))
                }};
                xhr.send();
            }}
        }});

    $('#button').click( function () {{
        table.row('.selected').remove().draw(false);
    }});
}}); 

</script>"""

img_tag = "<img src = '' alt = 'Please select image to show' id = 'browsed' height = 750 width = 750 align = 'middle'> </img>"

client = v3f.Client("framesd:8081", container="users")

access_key = os.environ['V3IO_ACCESS_KEY']
web_api_prefix = os.environ['WEB_API_PREFIX']


def load_images(data_path):
    return [f for f in paths.list_images(data_path) if '.ipynb' not in f]


def load_enc_df():
    return client.read(backend="kv",
                       table='iguazio/demos/demos/faces/artifacts/encodings',
                       reset_index=True)

Ejemplo n.º 22
0
def test_read(benchmark, framesd, protocol, backend):
    addr = getattr(framesd, '{}_addr'.format(protocol))
    client = v3f.Client(addr)
    benchmark(read_benchmark, client)
def get_cameras_list():
    client = v3f.Client(address=config['v3io']['frames'],
                        container=config['project']['container'],
                        token=config['v3io']['access_key'])
    df = client.read("kv", config['camera']['list_table'])
    return df
Ejemplo n.º 24
0
def test_write(benchmark, framesd, protocol, backend):
    addr = getattr(framesd, '{}_addr'.format(protocol))
    client = v3f.Client(addr)
    benchmark(write_benchmark, client, wdf)
Ejemplo n.º 25
0
def drift_magnitude(
    context,
    t: pd.DataFrame,
    u: pd.DataFrame,
    label_col=None,
    prediction_col=None,
    discretizers: dict = None,
    n_bins=5,
    stream_name: str = "some_stream",
    results_tsdb_container: str = "bigdata",
    results_tsdb_table: str = "concept_drift/drift_magnitude",
):
    """Drift magnitude metrics
       Computes drift magnitude metrics between base dataset t and dataset u.
       Metrics:
        - TVD (Total Variation Distance)
        - Helinger
        - KL Divergence

    :param context: MLRun context
    :param t: Base dataset for the drift metrics
    :param u: Test dataset for the drift metrics
    :param label_col: Label colum in t and u
    :param prediction_col: Predictions column in t and u
    :param discritizers: Dictionary of dicsritizers for the features if available
                         (Created automatically if not provided)
    :param n_bins: Number of bins to be used for histrogram creation from continuous variables
    :param stream_name: Output stream to push metrics to
    :param results_tsdb_container: TSDB table container to push metrics to
    :param results_tsdb_table: TSDB table to push metrics to
    """

    v3io_client = v3f.Client("framesd:8081", container=results_tsdb_container)
    try:
        v3io_client.create("tsdb", results_tsdb_table, if_exists=1, rate="1/s")
    except:
        v3io_client.create("tsdb",
                           results_tsdb_table,
                           if_exists=1,
                           attrs={"rate": "1/s"})

    df_t = t.as_df()
    df_u = u.as_df()

    drop_columns = []
    if label_col is not None:
        drop_columns.append(label_col)
    if prediction_col is not None:
        drop_columns.append(prediction_col)

    continuous_features = df_t.select_dtypes(["float"])
    if discretizers is None:
        discretizers = {}
        for feature in continuous_features.columns:
            context.logger.info(f"Fitting discretizer for {feature}")
            discretizer = KBinsDiscretizer(n_bins=n_bins,
                                           encode="ordinal",
                                           strategy="uniform")

            discretizer.fit(continuous_features.loc[:, feature].values.reshape(
                -1, 1))
            discretizers[feature] = discretizer
    os.makedirs(context.artifact_path, exist_ok=True)
    discretizers_path = os.path.abspath(
        f"{context.artifact_path}/discritizer.pkl")
    with open(discretizers_path, "wb") as f:
        pickle.dump(discretizers, f)
    context.log_artifact("discritizers", target_path=discretizers_path)
    context.logger.info("Discretizing featuers")
    for feature, discretizer in discretizers.items():
        df_t[feature] = discretizer.transform(df_t.loc[:,
                                                       feature].values.reshape(
                                                           -1, 1))
        df_u[feature] = discretizer.transform(df_u.loc[:,
                                                       feature].values.reshape(
                                                           -1, 1))
        df_t[feature] = df_t[feature].astype("int")
        df_u[feature] = df_u[feature].astype("int")
    context.log_dataset("t_discrete", df_t, format="parquet")
    context.log_dataset("u_discrete", df_u, format="parquet")

    context.logger.info("Compute prior metrics")

    results = {}
    t_prior, u_prior = to_observations(
        context,
        df_t.drop(drop_columns, axis=1),
        df_u.drop(drop_columns, axis=1),
        "features",
    )
    results["prior_tvd"], results["prior_helinger"], results[
        "prior_kld"] = all_metrics(t_prior, u_prior)

    if prediction_col is not None:
        context.logger.info("Compute prediction metrics")
        t_predictions = pd.DataFrame(df_t.loc[:, prediction_col])
        u_predictions = pd.DataFrame(df_u.loc[:, prediction_col])
        t_class, u_class = to_observations(context, t_predictions,
                                           u_predictions, "prediction")
        (
            results["prediction_shift_tvd"],
            results["prediction_shift_helinger"],
            results["prediction_shift_kld"],
        ) = all_metrics(t_class, u_class)

    if label_col is not None:
        context.logger.info("Compute class metrics")
        t_labels = pd.DataFrame(df_t.loc[:, label_col])
        u_labels = pd.DataFrame(df_u.loc[:, label_col])
        t_class, u_class = to_observations(context, t_labels, u_labels,
                                           "class")
        (
            results["class_shift_tvd"],
            results["class_shift_helinger"],
            results["class_shift_kld"],
        ) = all_metrics(t_class, u_class)

    for key, value in results.items():
        if value == float("inf"):
            context.logger.info(f"value: {value}")
            results[key] = 10
    for key, result in results.items():
        context.log_result(key, round(result, 3))

    now = pd.to_datetime(str(datetime.datetime.now()))
    now

    results["timestamp"] = pd.to_datetime(str((datetime.datetime.now())))
    context.logger.info(f"Timestamp: {results['timestamp']}")
    results["stream"] = stream_name
    results_df = pd.DataFrame(data=[list(results.values())],
                              columns=list(results.keys()))
    results_df = results_df.set_index(["timestamp", "stream"])
    v3io_client.write("tsdb", results_tsdb_table, dfs=results_df)
Ejemplo n.º 26
0
def encode_images(context):
    params = Params()
    params.set_params_from_context(context)
    context.logger.info(params)
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    context.logger.info(f'Running on device: {device}')

    client = v3f.Client(params.frames_url,
                        container=params.container,
                        token=params.token)

    if not os.path.exists(params.artifacts_path):
        os.makedirs(params.artifacts_path)
        context.logger.info("created dir {}".format(params.artifacts_path))

    if not os.path.exists(params.data_path + 'processed'):
        os.makedirs(params.data_path + 'processed')
        context.logger.info("created dir {}".format(params.data_path +
                                                    'processed'))

    if not os.path.exists(params.data_path + 'label_pending'):
        os.makedirs(params.data_path + 'label_pending')
        context.logger.info("created dir {}".format(params.data_path +
                                                    'label_pending'))

    # If no train images exist in the predefined path we will train the model on a small dataset of movie actresses
    if not os.path.exists(params.data_path + 'input'):
        os.makedirs(params.data_path + 'input')
        context.logger.info("created dir {}".format(params.data_path +
                                                    'input'))
        context.logger.info(
            "no input provided dowloading actresses images ......")
        resp = urlopen(
            'https://iguazio-public.s3.amazonaws.com/faces-demo/Actresses.zip')
        zip_ref = zipfile.ZipFile(BytesIO(resp.read()), 'r')
        zip_ref.extractall(params.data_path + 'input')
        zip_ref.close()

    if os.path.exists(params.data_path + 'input/__MACOSX'):
        shutil.rmtree(params.data_path + 'input/__MACOSX')

    idx_file_path = params.artifacts_path + "idx2name.csv"
    context.logger.info("index file path : {}".format(idx_file_path))
    if os.path.exists(idx_file_path):
        context.logger.info(
            "index file path exists : {} reading file".format(idx_file_path))
        idx2name_df = pd.read_csv(idx_file_path, index_col=0)
    else:
        context.logger.info(
            "index file path does not exists : {} ".format(idx_file_path))
        idx2name_df = pd.DataFrame(columns=['value', 'name'])

    # creates a mapping of classes(person's names) to target value
    new_classes_names = [
        f for f in os.listdir(params.data_path + 'input')
        if not '.ipynb' in f and f not in idx2name_df['name'].values
    ]

    initial_len = len(idx2name_df)
    final_len = len(idx2name_df) + len(new_classes_names)
    for i in range(initial_len, final_len):
        idx2name_df.loc[i] = {'value': i, 'name': new_classes_names.pop()}

    name2idx = idx2name_df.set_index('name')['value'].to_dict()
    # log name to index mapping into mlrun context
    context.logger.info("artifact_path {} + local_path  idx2name.csv".format(
        context.artifact_path))
    context.log_artifact(TableArtifact('idx2name', df=idx2name_df),
                         artifact_path=params.artifacts_path,
                         local_path='idx2name.csv')

    # generates a list of paths to labeled images
    imagePaths = [
        f for f in paths.list_images(params.data_path + 'input')
        if not '.ipynb' in f
    ]
    knownEncodings = []
    knownLabels = []
    fileNames = []
    urls = []
    for (i, imagePath) in enumerate(imagePaths):
        print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths)))
        # extracts label (person's name) of the image
        name = imagePath.split(os.path.sep)[-2]

        # prepares to relocate image after extracting features
        file_name = imagePath.split(os.path.sep)[-1]
        new_path = params.data_path + 'processed/' + file_name

        # converts image format to RGB for comptability with face_recognition library
        image = cv2.imread(imagePath)
        rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # detects coordinates of faces bounding boxes
        boxes = face_recognition.face_locations(rgb, model='hog')

        # computes embeddings for detected faces
        encodings = face_recognition.face_encodings(rgb, boxes)

        # this code assumes that a person's folder in the dataset does not contain an image with a face other then his own
        for enc in encodings:
            file_name = name + '_' + ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=5))
            knownEncodings.append(enc)
            knownLabels.append([name2idx[name]])
            fileNames.append(file_name)
            urls.append(new_path)

        # move image to processed images directory
        shutil.move(imagePath, new_path)

    # saves computed encodings to avoid repeating computations
    df_x = pd.DataFrame(knownEncodings,
                        columns=['c' + str(i).zfill(3)
                                 for i in range(128)]).reset_index(drop=True)
    df_y = pd.DataFrame(knownLabels, columns=['label']).reset_index(drop=True)
    df_details = pd.DataFrame([['initial training'] * 3] * len(df_x),
                              columns=['imgUrl', 'camera', 'time'])
    df_details['time'] = [datetime.datetime.utcnow()] * len(df_x)
    df_details['imgUrl'] = urls
    data_df = pd.concat([df_x, df_y, df_details], axis=1)
    data_df['fileName'] = fileNames

    client.write(backend='kv',
                 table=params.encodings_path,
                 dfs=data_df,
                 index_cols=['fileName'])

    encoding_path = "encoding"
    # with open('encodings_path.txt', 'w+') as f:
    #     f.write('encodings')
    context.log_artifact('encodings_path', body=encoding_path)
Ejemplo n.º 27
0
def get_cameras_list():
    client = v3f.Client(os.getenv('V3IO_FRAMES'),
                        container=os.getenv('IGZ_CONTAINER'))
    df = client.read('kv', os.getenv('CAMERA_LIST_TBL'))
    return df
Ejemplo n.º 28
0
    return [f for f in paths.list_images(images_path) if '.ipynb' not in f]


@st.cache
def load_enc_df(p_kv_table_path):
    return client.read(backend="kv", table=p_kv_table_path, reset_index=True, filter="label!=-1")


if __name__ == '__main__':
    logger = Logger()
    frames_uri = os.environ.get('FRAMES_URI')
    container = os.getenv('CONTAINER', 'users')
    token = os.getenv('V3IO_ACCESS_KEY')
    kv_table_path = os.getenv('KV_TABLE_PATH')
    logger.info(os.environ.items())
    client = v3f.Client(frames_uri, token=token, container=container)
    base_path = '/User/examples/faces/'
    data_path = base_path + 'data/'
    artifact_path = base_path+'artifacts/'
    classes_path = artifact_path + 'idx2name.csv'
    logger.info("classes_path: {}".format(classes_path))
    classes_df = pd.read_csv(classes_path)
    known_classes = [n.replace('_', ' ') for n in classes_df['name'].values]

    page = st.sidebar.selectbox('Choose option', ['Label Unknown Images', 'View Collected Images'], key=1)
    if page == 'Label Unknown Images':
        label_path = data_path + 'label_pending'
        logger.info("label_path: {}".format(data_path + 'label_pending'))
        images = load_images(label_path)
        st.title('Label Unknown Images')
Ejemplo n.º 29
0
def train(context, processed_data, model_name='model.bst'):
    
    hvd.init()
    
    try:
        device = torch.device("cuda")
    except AssertionError:
        context.logger.info("Requested running on cuda but no cuda device available. Terminating")
        exit(1)
    
    context.logger.info('Client')
    client = v3f.Client('framesd:8081', container="users")
    with open(processed_data.url, 'r') as f:                      
        t = f.read()

    context.logger.info('Loading dataset')
    data_df = client.read(backend="kv", table=t, reset_index=False, filter='label != -1')
    X = data_df[['c'+str(i).zfill(3) for i in range(128)]].values
    y = data_df['label'].values

    n_classes = len(set(y))

    X = torch.as_tensor(X, device=device)
    y = torch.tensor(y, device=device).reshape(-1, 1)
    
    input_dim = 128
    hidden_dim = 64
    output_dim = n_classes

    context.logger.info('Preparing model architecture')
    spec = importlib.util.spec_from_file_location('models', MODELS_PATH)
    models = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(models)

    model = models.FeedForwardNeuralNetModel(input_dim, hidden_dim, output_dim)
    model.to(device)
    model = model.double()
    
    criterion = nn.CrossEntropyLoss()
    learning_rate = 0.05
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    dataset = data.TensorDataset(X, y)
    train_loader = data.DataLoader(dataset)
    
    
    
    context.logger.info('preparing for horovod distributed training')
    torch.cuda.set_device(hvd.local_rank())
    optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters())
    train_sampler = data.distributed.DistributedSampler(dataset, num_replicas=hvd.size(), rank=hvd.rank())
    train_loader = data.DataLoader(dataset, sampler=train_sampler)
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    
    context.logger.info('Starting training process')
    for epoch in range(20):
        for features, target in train_loader:
            optimizer.zero_grad()
            out = model(features)
            loss = criterion(out, target[0])
            loss.backward()
            optimizer.step()
            
    
    if hvd.rank() == 0:
        context.logger.info('Save model')
        dump(model._modules, open(model_name, 'wb'))
        context.log_artifact('model', src_path=model_name, target_path=model_name, labels={'framework': 'Pytorch-FeedForwardNN'})
        os.remove(model_name)
Ejemplo n.º 30
0
# Copyright 2018 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Code used by test_pip_docker

from argparse import ArgumentParser

import v3io_frames as v3f

parser = ArgumentParser()
parser.add_argument('--grpc-port', default='8081')
parser.add_argument('--http-port', default='8080')
args = parser.parse_args()

client = v3f.Client('localhost:{}'.format(args.grpc_port))
df = client.read('csv', table='weather.csv')
assert len(df) > 0, 'empty df'