Python split_by Examples, utils.split_by Python Examples

Example #1

0

Show file

File: orcam_solution.py Project: elisim/DeepTIME-Datahack2019

def main(sigs_path, submission_path, train_to_test_ratio=0.5):
    # Read the imagenet signatures from file
    paths, signatures = read_signatures(sigs_path)
    # Enumerate the frame paths based on person and video
    person_ids, video_ids = enumerate_paths(paths)
    # For each person, split his set of videos to train and test
    train_indices, test_indices = train_test_split(person_ids, video_ids,
                                                   train_to_test_ratio)

    # Solution

    # Find the mean signature for each person based on the training set
    train_sigs = split_by(signatures[train_indices], person_ids[train_indices])
    train_sigs = np.vstack([np.mean(ts, axis=0) for ts in train_sigs])

    # Find the mean signature for each test - video and assign its ground-truth person id
    test_sigs = split_by(signatures[test_indices], video_ids[test_indices])
    test_sigs = np.vstack([np.mean(ts, axis=0) for ts in test_sigs])
    # Ground truth labels
    test_labels = np.array([
        pids[0]
        for pids in split_by(person_ids[test_indices], video_ids[test_indices])
    ])

    # Predict classes using cosine similarity
    similarity_matrix = cosine_similarity(test_sigs, train_sigs)

    # Crate a submission - a sorted list of predictions, best match on the left.
    ranking = similarity_matrix.argsort(axis=1)
    submission = [line.tolist() for line in ranking[:, :-6:-1]]

    # Compute and display top 1 / 5 accuracies
    evaluate(submission, test_labels)

Example #2

0

Show file

def main(sigs_train, sigs_test):
    # Read the imagenet signatures from file
    paths_train, train_sigs = read_signatures(sigs_train)
    paths_test, test_sigs = read_signatures(sigs_test)

    # Solution

    # Find the mean signature for each person based on the training set
    person_ids = np.array([int(p.split('/')[0][7:]) for p in paths_train])
    train_person_sigs = split_by(train_sigs, person_ids)
    train_person_sigs = np.vstack(
        [np.mean(ts, axis=0) for ts in train_person_sigs])

    # Find the mean signature for each test sequence
    seq_ids = np.array([int(p.split('/')[0][4:]) for p in paths_test])
    test_seq_sigs = split_by(test_sigs, seq_ids)
    test_seq_sigs = np.vstack([np.mean(ts, axis=0) for ts in test_seq_sigs])

    # Predict classes using cosine similarity
    similarity_matrix = cosine_similarity(test_seq_sigs, train_person_sigs)

    # Crate a submission - a sorted list of predictions, best match on the left.
    ranking = similarity_matrix.argsort(axis=1)
    submission = [line.tolist() for line in ranking[:, :-6:-1]]

    # submit to server, print reply (-1 means something is wrong)
    print(submit('naive', submission))

Example #3

0

Show file

File: protocol.py Project: amvtek/EventSource

    def buildEventStream(self):

        lastEvtId = self.request.evsLastId

        evtSource = TestSource(messages=self.factory.messages, **self.request.evsArgs)
        evtSequence = evtSource.visit_from(lastEvtId+1)

        if lastEvtId > -1:

            self.log.msg("restart streaming from : %d" % lastEvtId)

        else:

            self.log.msg("new eventsource stream...")

        restart = lambda: None

        for message in evtSequence:

            # extract start of message...
            msgstart = message[:message.find(":", 0, 8)+12]
            self.log.msg("new event line : %s..." % msgstart)

            for part in split_by(message, RND.randint(1, 3)):
                self.transport.write(part)
                yield deferLater(reactor, RND.uniform(0.05, 0.3), restart)

Example #4

0

Show file

def main(sigs_path, images_path, samples_per_person=16):
    # Read the imagenet signatures from file
    paths, signatures = read_signatures(sigs_path)
    # Enumerate the frame paths based on person and video
    person_ids, video_ids = enumerate_paths(paths)
    # Sample "samples_per_person" images from each person
    sampled_indices = [
        pid for pp in split_by(range(len(paths)), person_ids)
        for pid in sorted(np.random.choice(pp, samples_per_person).tolist())
    ]
    sampled_paths = [paths[idx] for idx in sampled_indices]
    sampled_labels = np.mgrid[:len(sampled_indices
                                   ), :samples_per_person][0].ravel()
    # Get images of sampled data points
    with Images(images_path) as images:
        sampled_images = [images[path] for path in sampled_paths]
    sampled_images = np.concatenate([sampled_images]).transpose([0, 3, 1, 2])
    # Get normalized signatures of sampled data points
    sampled_sigs = signatures[sampled_indices]
    sampled_sigs /= np.sqrt(
        np.sum(np.square(sampled_sigs), axis=1, keepdims=True))
    # Write data to tensorboard projector
    writer = SummaryWriter()
    meta_data = [sp.split('/')[0] for sp in sampled_paths]
    label_img = torch.from_numpy(sampled_images).float() / 255
    writer.add_embedding(torch.from_numpy(sampled_sigs),
                         metadata=meta_data,
                         label_img=label_img)
    print('Visualization ready')
    print('run: \t tensorboard --logdir=runs')

Example #5

0

Show file

File: protocol.py Project: mdesantis/EventSource

    def buildEventStream(self):

        lastEvtId = self.request.evsLastId

        evtSource = TestSource(messages=self.factory.messages,
                               **self.request.evsArgs)
        evtSequence = evtSource.visit_from(lastEvtId + 1)

        if lastEvtId > -1:

            self.log.msg("restart streaming from : %d" % lastEvtId)

        else:

            self.log.msg("new eventsource stream...")

        restart = lambda: None

        for message in evtSequence:

            # extract start of message...
            msgstart = message[:message.find(":", 0, 8) + 12]
            self.log.msg("new event line : %s..." % msgstart)

            for part in split_by(message, RND.randint(1, 3)):
                self.transport.write(part)
                yield deferLater(reactor, RND.uniform(0.05, 0.3), restart)

Example #6

0

Show file

def main(pose_path, submission_path, train_to_val_ratio=0.5):
    # Read the imagenet signatures from file
    paths_test, test_pose, test_scores = read_pose(pose_path)
    test_pose = test_pose[:, :13, :]
    test_pose = np.reshape(test_pose, (test_pose.shape[0], 26))

    seq_ids = np.array([int(p.split('/')[0][4:]) for p in paths_test])
    test_seq_pose = split_by(test_pose, seq_ids)

    print(test_seq_pose.__len__())
    sequence_sz = 20  #20

    test_pose = test_seq_pose
    test_pose2 = []
    for testpose in test_pose:
        test_pose2.append(testpose[np.random.choice(testpose.shape[0],
                                                    sequence_sz)])

    X_test = np.array(test_pose2)

    # Load in model and evaluate on validation data
    model = load_model('modelPoseRNN.h5')
    preds = model.predict(X_test)

    # Crate a submission - a sorted list of predictions, best match on the left.
    ranking = preds.argsort(axis=1)
    submission = [line.tolist() for line in ranking[:, :-6:-1]]
    print(submission[:10])

    from evaluate import submit
    submit('rrr', submission)

Example #7

0

Show file

File: solution_submit_pose.py Project: noasimon/datahack2019

def main(pose_train, pose_test):
    # Read the poses from file
    #here train=train+val
    paths_train, train_pose, train_scores = read_pose(pose_train)
    paths_test, test_pose, test_scores = read_pose(pose_test)

    # Solution
    #using only the poses that have score > 0 in more than 50% of the frames (= the first 13 poses)
    #filtering good values of poses
    bad = np.sum(train_scores[:, :13] < 0, 1)
    good = bad < 7
    good_indices = np.where(good)[0]
    good_paths_train = np.array(paths_train)[good_indices]
    #here t=train+val
    t1 = train_pose[:, :13, :]
    t_pose = np.reshape(t1, (581685, 26))
    t_pose = t_pose[good_indices, :]
    #train
    sequence_sz = 20
    person_ids, video_ids = enumerate_paths(paths_train)
    #    train_indices, val_indices = train_val_split(person_ids, video_ids, 0.7)
    train_indices, val_indices = train_val_split(person_ids[good_indices],
                                                 video_ids[good_indices], 0.7)
    video_ids = video_ids[good_indices]

    train_p = split_by(t_pose[train_indices], video_ids[train_indices])
    train_p2 = []
    for train_p1 in train_p:
        train_p2.append(train_p1[np.random.choice(train_p1.shape[0],
                                                  sequence_sz)])
    X_train = np.array(train_p2)
    #val
    val_p = split_by(t_pose[val_indices], video_ids[val_indices])
    val_p2 = []
    for val_p1 in val_p:
        val_p2.append(val_p1[np.random.choice(val_p1.shape[0], sequence_sz)])
    X_val = np.array(val_p2)

    # Ground truth labels
    val_labels = np.array([
        pids[0]
        for pids in split_by(person_ids[val_indices], video_ids[val_indices])
    ])
    train_labels = np.array([
        pids[0] for pids in split_by(person_ids[train_indices],
                                     video_ids[train_indices])
    ])

    y_train = np.zeros((train_labels.shape[0], 101))
    y_train[np.arange(train_labels.shape[0]), train_labels] = 1
    y_valid = np.zeros((val_labels.shape[0], 101))
    y_valid[np.arange(val_labels.shape[0]), val_labels] = 1

    # --- build RNN model ---
    model = Sequential()
    # inp sz
    poseNum = 26
    #     maxSequenceSz = sequence_sz  # =20, 30, ...3999
    batchSz = 32
    # Recurrent layer
    hiddenSz = 496
    model.add(
        LSTM(hiddenSz,
             return_sequences=False,
             dropout=0.1,
             recurrent_dropout=0.1,
             input_shape=(sequence_sz, poseNum)))
    # Fully connected layer
    fullySz = 64
    model.add(Dense(fullySz, activation='relu'))
    # Dropout for regularization
    model.add(Dropout(0.5))
    # Output layer - 101 persons
    outSz = 101
    model.add(Dense(outSz, activation='softmax'))
    # Compile the model
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    print(1)
    print(model.summary())
    # Create callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=5),
        ModelCheckpoint('modelPoseRNN.h5',
                        save_best_only=True,
                        save_weights_only=False)
    ]
    # fit
    history = model.fit(X_train,
                        y_train,
                        batch_size=batchSz,
                        epochs=50,
                        callbacks=callbacks,
                        validation_data=(X_val, y_valid))

    # --- eval on validation data ---
    # Load in model and evaluate on validation data
    model = load_model('modelPoseRNN.h5')
    model.evaluate(X_val, y_valid)
    # test the performance on the validation data
    preds = model.predict(X_val)
    # Crate a submission - a sorted list of predictions, best match on the left.
    ranking = preds.argsort(axis=1)
    submission = [line.tolist() for line in ranking[:, :-6:-1]]
    print(submission[:10])