Ejemplos de preprocess en Python, ejemplos de utils.preprocess en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: optimize.py Proyecto: fartashf/under_convnet

def constOptimize(net, base_img, guide_img, objective, iter_n, max_thres,
                  end, factr=factr, pgtol=pgtol, verbose=True):
    proc_base = utils.preprocess(net, base_img)
    proc_guide = utils.preprocess(net, guide_img)
    src = net.blobs['data']
    ch, h, w = proc_base.shape
    src.reshape(1, ch, h, w)
    # allocate image for network-produced details
    src, dst = net.blobs['data'], net.blobs[end]
    src.data[0] = proc_guide
    net.forward(end='prob')
    guide_features = dst.data[0].copy()

    up_bnd = proc_base + max_thres
    lw_bnd = proc_base - max_thres
    mean_arr = net.transformer.mean['data']
    if mean_arr.ndim == 1:
        mean_arr = mean_arr.reshape((3, 1, 1))
    up_bnd = np.minimum(up_bnd, 255 - mean_arr)
    lw_bnd = np.maximum(lw_bnd, 0 - mean_arr)
    bound = zip(lw_bnd.flatten(), up_bnd.flatten())
    src.data[0] = proc_base
    x, f, d = cnstOpt(calc_gstep, proc_base.flatten().astype(float),
                      args=(net, guide_features, end, objective, verbose),
                      bounds=bound, maxiter=iter_n, iprint=0, factr=factr,
                      pgtol=pgtol)

    return x.reshape(proc_base.shape), f, d

Ejemplo n.º 2

0

Mostrar archivo

Archivo: run.py Proyecto: seckcoder/lang-learn

def generate(sentence):
    with open('train.txt') as fin:
        train = fin.read()
        train += ' ' + preprocess(sentence)
        morse_codes = translateToMorseCode(train)
        huffman_tree = HuffmanTree()
        huffman_tree.train(morse_codes)
        print huffman_tree.translate(preprocess(sentence))

Ejemplo n.º 3

0

Mostrar archivo

Archivo: classifier.py Proyecto: nwang57/InstrumentClassifier

def predict(clf, file_path, scaler=None):
    data = preprocess(file_path)
    X = extract_all_features(data, 44100)
    X = np.asmatrix(X)
    if scaler:
        X = scaler.transform(X)
    res = clf.predict(X[:,imporved_features()])[0]
    return res

Ejemplo n.º 4

0

Mostrar archivo

Archivo: submission.py Proyecto: brotherofken/national_data_science_bowl_2

def submission():
    """
    Generate submission file for the trained models.
    """
    logging.info('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    logging.info('Loading models weights...')
    model_systole.load_weights('../models/weights/weights_systole_best.hdf5')
    model_diastole.load_weights('../models/weights/weights_diastole_best.hdf5')

    # load val losses to use as sigmas for CDF
    with open('./logs/val_loss.txt', mode='r') as f:
        val_loss_systole = float(f.readline())
        val_loss_diastole = float(f.readline())

    logging.info('Loading validation data...')
    X, ids = load_validation_data()

    logging.info('Pre-processing images...')
    X = preprocess(X)

    batch_size = 32
    logging.info('Predicting on validation data...')
    pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1)
    pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1)

    # real predictions to CDF
    cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole)
    cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole)

    logging.info('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    logging.info('Writing submission to file...')
    fi = csv.reader(open('../input/sample_submission_validate.csv'))
    f = open('../submissions/submission_17.csv', 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(next(fi))
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            logging.info('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    logging.info('Done.')

Ejemplo n.º 5

0

Mostrar archivo

Archivo: parse.py Proyecto: mgree/tmpl

def tokenize(text):
    replacements = [("---", " "), ("--", " "), ("-", "")]  # trying to capture multi-word keywords

    for (src, tgt) in replacements:
        text = text.replace(src, tgt)

    words = utils.preprocess(text)

    return filter(lambda w: w not in stops, words)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: server.py Proyecto: hellcoderz/vowpal_wabbit_experments

def trainAPI():
    global vw, sequenceLabeler
    model = request.args.get("model", "tagger1.bin")
    N = request.args.get("iter", 10)
    try:
        data = request.args.get("data")
        data = data.strip()
        print "Traning:", data
        sequenceLabeler.learn(preprocess([data]))
        return "model trained"
    except:
        return "'data' field not present OR trainning error!!!"

Ejemplo n.º 7

0

Mostrar archivo

Archivo: submission.py Proyecto: sjuvekar/kaggle-dsb2-keras

def submission():
    """
    Generate submission file for the trained models.
    """
    print('Loading and compiling models...')
    model_systole = get_vgg_model()
    model_diastole = get_vgg_model()

    print('Loading models weights...')
    model_systole.load_weights('weights_systole_best.hdf5')
    model_diastole.load_weights('weights_diastole_best.hdf5')

    print('Loading validation data...')
    X, ids = load_validation_data()

    print('Pre-processing images...')
    X = preprocess(X)

    batch_size = 32
    print('Predicting on validation data...')
    pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1)
    pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1)

    # real predictions to CDF
    cdf_pred_systole = pred_systole.cumsum(axis=-1) 
    cdf_pred_diastole = pred_diastole.cumsum(axis=-1)

    print('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    print('Writing submission to file...')
    fi = csv.reader(open('/data/heart/sample_submission_test.csv'))
    f = open('submission.csv', 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(fi.next())
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            print('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    print('Done.')

Ejemplo n.º 8

0

Mostrar archivo

Archivo: console.py Proyecto: hellcoderz/vowpal_wabbit_experments

def main():
    vw = []
    sl = []
    while True:
        inp = raw_input("> ")

        inp = inp.strip()
        words = inp.split()

        cmd = words[0]
        if cmd == "/save":
            for temp in vw:
                temp.finish()
            sys.exit(1)
        if cmd == "/train":
            data = " ".join(words[1:]).strip()
            for i in range(10):
                for temp in sl:
                    temp.learn(preprocess([data]))
        elif cmd == "/query":
            data = " ".join(words[1:]).strip()
            output = set()
            for s in sl:
                output.add(postprocess(query(s, data)))
            for out in output:
                print "\t", out
        elif cmd == "/start":
            data = " ".join(words[1:]).strip()
            if os.path.isfile(data + ".1") and os.path.isfile(data + ".2") and os.path.isfile(
                            data + ".3") and os.path.isfile(data + ".4"):
                vw = [
                    pyvw.vw("--quiet -i " + data + ".1 -f "+data + ".1"),
                    pyvw.vw("--quiet -i " + data + ".2 -f "+data + ".2"),
                    pyvw.vw("--quiet -i " + data + ".3 -f "+data + ".3"),
                    pyvw.vw("--quiet -i " + data + ".4 -f "+data + ".4")
                ]
            else:
                vw = [
                    pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".1"),
                    pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".2"),
                    pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".3"),
                    pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".4")
                ]
            sl = [
                vw[0].init_search_task(SequenceLabeler),
                vw[1].init_search_task(SequenceLabeler2),
                vw[2].init_search_task(SequenceLabeler3),
                vw[3].init_search_task(SequenceLabeler4)
            ]

Ejemplo n.º 9

0

Mostrar archivo

def create_bb_pip(tfr, nepoch, sbatch, mean, shuffle=True):
    tf_mean = tf.constant(mean, dtype=tf.float32)
    tf_mean = tf.reshape(tf_mean, [1, 1, 1, 3])

    fqueue = tf.train.string_input_producer([tfr], num_epochs=nepoch * 10)
    image, idx, bbx = read_single_image(fqueue, 64)

    data = tf.train.batch([image, idx, bbx],
                          batch_size=sbatch,
                          num_threads=1,
                          capacity=sbatch * 3)

    # preprocess input images
    data[0] = preprocess(data[0], tf_mean)
    return data

Ejemplo n.º 10

0

Mostrar archivo

def predict(model, img_path, device):
    from utils import preprocess, transform

    model.eval()
    with torch.no_grad():
        in_shape = np.asarray(cv2.imread(img_path)).shape
        img = preprocess(img_path)
        fin_shape = np.asarray(img).shape
        img = transform(img)
        img = Variable(img).to(device)
        img = img.view(1, 1, fin_shape[0],fin_shape[1])
        output = model(img)
        img = ((255*output.cpu().clone().detach().numpy()).squeeze().squeeze())
        img = cv2.resize(img, (in_shape[1],in_shape[0]),interpolation = cv2.INTER_AREA)
        return img

Ejemplo n.º 11

0

Mostrar archivo

 def extract_feature(self, images, batch_size, preprocess=False, config=None, is_training=False):
     num_images = images.shape[0] if type(images)==np.ndarray else len(images)
     num_features = self.outputs.shape[1]
     result = np.ndarray((num_images, num_features), dtype=np.float32)
     for start_idx in range(0, num_images, batch_size):
         end_idx = min(num_images, start_idx + batch_size)
         inputs = images[start_idx:end_idx]
         if preprocess:
             assert config is not None
             inputs = utils.preprocess(inputs, config, is_training)
         feed_dict = {self.inputs: inputs,
                     self.phase_train_placeholder: False,
                     self.keep_prob_placeholder: 1.0}
         result[start_idx:end_idx] = self.sess.run(self.outputs, feed_dict=feed_dict)
     return result

Ejemplo n.º 12

0

Mostrar archivo

Archivo: hgan.py Proyecto: wchen459/hgan_jmd_2019

 def embed(self, X0, X1, X2, X3, X4):
     X0 = preprocess(X0)
     X1 = preprocess(X1)
     X2 = preprocess(X2)
     X3 = preprocess(X3)
     X4 = preprocess(X4)
     X0_latent, X1_latent, X2_latent, X3_latent, X4_latent = self.sess.run(
         [
             self.c0_test, self.c1_test, self.c2_test, self.c3_test,
             self.c4_test
         ],
         feed_dict={
             self.x0: X0,
             self.x1: X1,
             self.x2: X2,
             self.x3: X3,
             self.x4: X4
         })
     return X0_latent[:,
                      0, :], X1_latent[:,
                                       0, :], X2_latent[:,
                                                        0, :], X3_latent[:,
                                                                         0, :], X4_latent[:,
                                                                                          0, :]

Ejemplo n.º 13

0

Mostrar archivo

    def decode(self, words, lower=False):
        """ Return the words with tags of the given words.

        args:
            - words (list): Input words.
            - lower (bool, optional): If lower is True, all uppercase characters in a list \
                            of the words are converted into lowercase characters.
        return:
            - object : The object of the words with tags.
        """
        if not type(words) == list:
            raise AssertionError("Please input a list of words.")
        words = [utils.preprocess(w) for w in words]
        postags = self._postagging(words, lower)
        return postags

Ejemplo n.º 14

0

Mostrar archivo

Archivo: drive_modified.py Proyecto: ikvibhav/DeepLearning_Autonomous_Embedded_Agents

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = float(data["steering_angle"])
        # The current throttle of the car, how hard to push peddle
        throttle = float(data["throttle"])
        # The current speed of the car
        speed = float(data["speed"])
        # The current image from the center camera of the car
        image = Image.open(BytesIO(base64.b64decode(data["image"])))
        try:
            image = np.asarray(image)       # from PIL image to numpy array
            image = utils.preprocess(image) # apply the preprocessing
            image = np.array([image])       # the model expects 4D array

            # predict the steering angle for the image
            steering_angle = float(model.predict(image, batch_size=1))
            # lower the throttle as the speed increases
            # if the speed is above the current speed limit, we are on a downhill.
            # make sure we slow down first and then go back to the original max speed.
            global speed_limit
            if speed > speed_limit:
                speed_limit = MIN_SPEED  # slow down
            else:
                speed_limit = MAX_SPEED
            throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2

            #steering_angle_list.append(steering_angle)
            #i_list.append(i+1)
            #df = DataFrame({'time_values': i_list, 'Steering angle': steering_angle_list})
            #df.to_excel('steering_angle_data.xlsx', sheet_name='sheet1', index=False)
            file_object.write(str(steering_angle))
            file_object.write('\n')
            print('{} {} {}'.format(steering_angle, throttle, speed))
            send_control(steering_angle, throttle)
        except Exception as e:
            file_object.close()
            print(e)

        # save frame
        if args.image_folder != '':
            #df = DataFrame({'time_values': i_list, 'Steering angle': steering_angle_list})
            #df.to_excel('steering_angle_data.xlsx', sheet_name='sheet1', index=False)
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
    else:
        sio.emit('manual', data={}, skip_sid=True)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: drive.py Proyecto: sjs253/Autonomous-Driving

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = float(data["steering_angle"])
        # The current throttle of the car, how hard to push peddle
        throttle = float(data["throttle"])
        # The current speed of the car
        speed = float(data["speed"])

        # The current image from the center camera of the car
        original_image = Image.open(BytesIO(base64.b64decode(data["image"])))
        try:
            image = np.asarray(original_image)  # from PIL image to numpy array
            image = utils.preprocess(image)  # apply the preprocessing
            image = transformations(image)
            image = torch.Tensor(image)
            #image = np.array([image])       # the model expects 4D array

            image = image.view(1, 3, 75, 320)
            image = Variable(image)

            # predict the steering angle for the image
            steering_angle = model(image).view(-1).data.numpy()[0]

            # lower the throttle as the speed increases
            # if the speed is above the current speed limit, we are on a downhill.
            # make sure we slow down first and then go back to the original max speed.
            global speed_limit
            if speed > speed_limit:
                speed_limit = MIN_SPEED  # slow down
            else:
                speed_limit = MAX_SPEED
            throttle = 1.0 - steering_angle**2 - (speed / speed_limit)**2
            #throttle = controller.update(float(speed)) - 0.1
            print('{} {} {}'.format(steering_angle, throttle, speed))
            send_control(steering_angle, throttle)
        except Exception as e:
            print("Exception")
            print(e)

        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            original_image.save('{}.jpg'.format(image_filename))
    else:

        sio.emit('manual', data={}, skip_sid=True)

Ejemplo n.º 16

0

Mostrar archivo

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = float(data["steering_angle"])
        # The current throttle of the car
        throttle = float(data["throttle"])
        # The current speed of the car
        speed = float(data["speed"])
        # The current image from the center camera of the car
        image = Image.open(BytesIO(base64.b64decode(data["image"])))
        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))

        try:
            image = np.asarray(image)       # from PIL image to numpy array
            image_copy = np.copy(image)
            image_copy = autoenconder_model.normalize_and_reshape(image_copy)
            loss = anomaly_detection.test_on_batch(image_copy, image_copy)

            image = utils.preprocess(image) # apply the preprocessing
            image = np.array([image])       # the model expects 4D array

            # predict the steering angle for the image
            steering_angle = float(model.predict(image, batch_size=1))
            # lower the throttle as the speed increases
            # if the speed is above the current speed limit, we are on a downhill.
            # make sure we slow down first and then go back to the original max speed.
            global speed_limit
            if speed > speed_limit:
                speed_limit = MIN_SPEED  # slow down
            else:
                speed_limit = MAX_SPEED
            throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2

            if loss > 0.035:
                print('{} {} {} {} WARNING'.format(steering_angle, throttle, speed, loss))
            else:
                print('{} {} {} {} OK'.format(steering_angle, throttle, speed, loss))
            send_control(steering_angle, throttle)
        except Exception as e:
            print(e)

    else:
        # NOTE: DON'T EDIT THIS.
        sio.emit('manual', data={}, skip_sid=True)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: testing.py Proyecto: robotica-galilei/rsm-2017-ga

def classify(v):
    #######   training part    ###############
    samples = np.loadtxt('generalsamples.data', np.float32)
    responses = np.loadtxt('generalresponses.data', np.float32)
    responses = responses.reshape((responses.size, 1))

    model = cv2.ml.KNearest_create()
    model.train(samples, cv2.ml.ROW_SAMPLE, responses)

    ############################# testing part  #########################

    cap = cv2.VideoCapture(1)
    labels = ['S', 'U', 'H']

    while (True):
        ret, im = cap.read()
        #print(str('train' + str(i)+'.jpg'))
        #im = cv2.imread(str('train/' + str(i)+'.jpg'))

        img = utils.preprocess(im)
        im2 = img.copy()

        image, contours, hierarchy = cv2.findContours(im2, cv2.RETR_LIST,
                                                      cv2.CHAIN_APPROX_SIMPLE)

        ############################# classification #########################
        for cnt in contours:
            area = cv2.contourArea(cnt)
            if utils.check_rectangle(area):
                [x, y, w, h] = cv2.boundingRect(cnt)
                if utils.check_ratio(x, y, w, h):
                    cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    retval, results, neigh_resp, dists = model.findNearest(
                        utils.roismall(img, x, y, w, h), k=3)
                    string = labels[int(results[0][0])]
                    cv2.putText(im,
                                string, (x + 3, y + h + 3),
                                0,
                                2, (255, 0, 0),
                                thickness=3)

        cv2.imshow('im', im)
        #cv2.imshow('processed',img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

Ejemplo n.º 18

0

Mostrar archivo

Archivo: drive.py Proyecto: MathiasHaudgaard/reflex_behaviour

def telemetry(sid, data):
    if data:

        # The current steering angle of the car
        steering_angle = float(data["steering_angle"])
        # The current throttle of the car, how hard to push peddle
        throttle = float(data["throttle"])
        # The current speed of the car
        speed = float(data["speed"])

        # The current image from the center camera of the car
        image = Image.open(BytesIO(base64.b64decode(data["image"])))
        img = np.asarray(image)
        img = utils.preprocess(img)
        try:
            # from PIL image to numpy array
            #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            # predict the steering angle for the image
            img = Variable(torch.cuda.FloatTensor([img], device=device)).permute(0,3,1,2)

            steering_angle_throttle = model(img)
            #steering_angle = steering_angle_throttle[0].item()
            #throttle = steering_angle_throttle[1].item()
            steering_angle = steering_angle_throttle.item()
            #print(f'steering angle {steering_angle}')
            # lower the throttle as the speed increases
            # if the speed is above the current speed limit, we are on a downhill.
            # make sure we slow down first and then go back to the original max speed.
            global speed_limit
            if speed > speed_limit:
                speed_limit = MIN_SPEED  # slow down
            else:
                speed_limit = MAX_SPEED
            throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2

            print('sterring_angle: {} throttle: {} spped: {}'.format(steering_angle, throttle, speed))
            send_control(steering_angle, throttle)
        except Exception as e:
            print(e)

        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
    else:

        sio.emit('manual', data={}, skip_sid=True)

Ejemplo n.º 19

0

Mostrar archivo

def main(args):
    # I/O
    config_file = args.config_file
    config = utils.import_file(config_file, 'config')

    #trainset = utils.Dataset(config.train_dataset_path)
    testset = utils.Dataset(config.test_dataset_path)

    network = BaseNetwork()
    network.initialize(config, 0 ) #trainset.num_classes


    # Initalization for running
    log_dir = utils.create_log_dir(config, config_file)
    summary_writer = tf.summary.FileWriter(log_dir, network.graph)
    if config.restore_model is not None:
        network.restore_model(config.restore_model, config.restore_scopes)

    # Set up LFW test protocol and load images
    print('Loading images...')
    lfwtest = LFWTest(testset.images)
    lfwtest.init_standard_proto(config.lfw_pairs_file)
    lfwtest.images = utils.preprocess(lfwtest.image_paths, config, is_training=False)


    #trainset.start_batch_queue(config, True)


    #
    # Main Loop
    #
    print('\nStart Training\nname: %s\n# epochs: %d\nepoch_size: %d\nbatch_size: %d\n'\
        % (config.name, config.num_epochs, config.epoch_size, config.batch_size))
    global_step = 0

    # Testing on LFW
    print('Testing on Neetis LFW protocol...')
    embeddings = network.extract_feature(lfwtest.images, config.batch_size)
    print(type(embeddings))

    accuracy_embeddings, threshold_embeddings = lfwtest.test_standard_proto(embeddings)
    print('Embeddings Accuracy: %2.4f Threshold %2.3f' % (accuracy_embeddings, threshold_embeddings))

    with open(os.path.join(log_dir,'lfw_result.txt'),'at') as f:
        f.write('%d\t%.5f\n' % (global_step,accuracy_embeddings))
    summary = tf.Summary()
    summary.value.add(tag='lfw/accuracy', simple_value=accuracy_embeddings)
    summary_writer.add_summary(summary, global_step)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: code_draft.py Proyecto: yrlmzmerve/NLP

def read_data_from_file(data_path):
    maybe_download()
    with open(data_path) as f:
        text = f.read()

    ###########################################################
    # ------------------- Preprocessing -----------------------
    # 1. Tokenize punctuations e.g. period -> <PERIOD>
    # 2. Remove words that show up five times or fewer
    words = utils.preprocess(text)

    # Hmm, let's take a look at the processed data
    print('First 30 words:', words[:30])
    print('Total words:', len(words))
    print('Total unique words:', len(set(words)))

    # Create two dictionaries to convert words to integers
    vocab_to_int, int_to_vocab = utils.create_lookup_tables(words)
    n_vocab = len(int_to_vocab)

    # Convert words into integers
    int_words = [vocab_to_int[w] for w in words]

    ###########################################################
    # ------------------- Subsampling -------------------------
    # Some words like "the", "a", "of" etc don't provide much
    # information. So we might want to remove some of them.
    # This results in faster and better result.
    # The probability that a word is discarded is
    # P(w) = 1 - sqrt(1 / frequency(w))
    each_word_count = Counter(int_words)
    total_count = len(int_words)
    threshold = 1e-5  # FLAGS.drop_word_threshold

    freqs = {word: count/total_count for word,
             count in each_word_count.items()}
    probs = {word: 1 - np.sqrt(threshold/freqs[word])
             for word in each_word_count}

    train_words = [word for word in int_words if random.random() <
                   (1 - probs[word])]

    print('After subsampling, first 30 words:', train_words[:30])
    print('After subsampling, total words:', len(train_words))

    # Subsampling makes it worse for eliminating contextual info
    # return train_words, int_to_vocab, vocab_to_int, n_vocab
    return int_words, int_to_vocab, vocab_to_int, n_vocab

Ejemplo n.º 21

0

Mostrar archivo

def get_tweets():
    global batch_start_time
    processed_tweet = []
    try:
        for line in api.GetStreamSample():
            if 'text' in line and line['lang'] == u'en':
                text = line['text'].encode('utf-8').replace('\n', ' ')
                p_t = preprocess(text)  # process tweets
                if p_t:
                    processed_tweet += p_t,
            if time.time(
            ) - batch_start_time >= tw * 60:  # time is over for this batch
                return processed_tweet
        return processed_tweet  # server-side interruption
    except:
        pass

Ejemplo n.º 22

0

Mostrar archivo

Archivo: clustering.py Proyecto: isspek/Sentinel-NLP-Role-Tasks

def preprocess_clustering(text: str):
    text = preprocess(text)
    tokens = text.split(' ')
    doc = []
    for token in tokens:
        if token in string.punctuation:
            continue
        if token.isnumeric():
            continue
        if len(token) < 2:
            continue

        # lemmatize the words
        token = LEMMATIZER.lemmatize(token)
        doc.append(token)
    return doc

Ejemplo n.º 23

0

Mostrar archivo

Archivo: cityscapes.py Proyecto: tangy5/abdomenSeg2D

  def __getitem__(self, index):
    _img = Image.open(self.images[index]).convert('RGB')
    _target = Image.open(self.masks[index])

    _img, _target = preprocess(_img, _target,
                               flip=True if self.train else False,
                               scale=(0.5, 2.0) if self.train else None,
                               crop=(self.crop_size, self.crop_size) if self.train else (1025, 2049))

    if self.transform is not None:
      _img = self.transform(_img)

    if self.target_transform is not None:
      _target = self.target_transform(_target)
    # print(_img.shape)
    return _img, _target

Ejemplo n.º 24

0

Mostrar archivo

Archivo: dataloader.py Proyecto: sankaushik/photorealistic_style_transfer

    def __init__(self, base_dir, batch_size, rst, max_size=500,
                normalize=True, preprocessing=True):
        BATCH_FILES = 4
        self.base_dir = base_dir
        self.batch_size = batch_size
        self.rst = rst
        self.normalize = normalize
        self.max_size = max_size
        self.preprocessing = preprocessing
        self.x = self.get_content_images()

        if self.preprocessing:
            self.x = utils.preprocess(self.x)

        if normalize:
            self.x = utils.norm(self.x)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: processor.py Proyecto: YuanWind/GoToNLP

def read_phone_data(f_name):
    with open(f_name, 'r', encoding='utf-8') as f_phone:
        phone_data = []
        phone = f_phone.readlines()
        phone.insert(0, '\n')
        for idx, line in tqdm(enumerate(phone)):
            if idx + 1 < len(phone) and phone[idx + 1] == '\n': continue
            if line == '\n':
                i, text = 0, ''
            i += 1
            if i > 3:
                text += line
                if idx + 2 < len(phone) and phone[idx + 2] == '\n':
                    label = int(phone[idx + 1].replace('\n', ''))
                    phone_data.append({label: preprocess(text)})
    return phone_data

Ejemplo n.º 26

0

Mostrar archivo

Archivo: text2graph.py Proyecto: FyzHsn/graphs

    def preprocess(self, stop_filter=True, pos_filter=True):
        """Preprocess document text

        This method can filter out basic stopwords listed in the utils file
        and parts of speech.

        :param stop_filter: stopword filter status
        :type stop_filter: bool
        :param pos_filter: parts of speech filter status
        :type pos_filter: bool
        """

        self.text = ". ".join(
            preprocess(self.text,
                       stop_filter=stop_filter,
                       pos_filter=pos_filter))

Ejemplo n.º 27

0

Mostrar archivo

Archivo: random_forest-DDI.py Proyecto: RaquelLeandra/mai_ahlt

def train_baseline():
    train_df = pd.read_csv(train_df_path, index_col=0)

    sentences_train, dictionary, y_train_encoded = preprocess(
        train_df, processed_train_df_path, encoder)
    y_train = train_df['relation_type'].values
    vectorizer.fit(sentences_train)
    X_train = vectorizer.transform(sentences_train)
    print('training...')
    classifier = RandomForestClassifier(n_estimators=700,
                                        max_depth=60,
                                        n_jobs=-1,
                                        class_weight='balanced')
    classifier.fit(X_train, y_train)
    print('trained')
    return classifier, dictionary

Ejemplo n.º 28

0

Mostrar archivo

Archivo: MLP-DDI.py Proyecto: RaquelLeandra/mai_ahlt

def train_baseline():
    train_df = pd.read_csv(train_df_path, index_col=0)

    sentences_train, dictionary, y_train_encoded = preprocess(
        train_df, processed_train_df_path, encoder)
    y_train = train_df['relation_type'].values
    vectorizer.fit(sentences_train)
    X_train = vectorizer.transform(sentences_train)
    print('training...')
    classifier = MLPClassifier(activation='tanh',
                               alpha=0.1,
                               hidden_layer_sizes=(30, 5),
                               learning_rate='constant')
    classifier.fit(X_train, y_train)
    print('trained')
    return classifier, dictionary

Ejemplo n.º 29

0

Mostrar archivo

Archivo: dataset.py Proyecto: shyaoni/ctt

    def raw_utterance_with_keyword(self, vocab, train=True):
        cache = cacher('dts_ConvAI2.raw_utterance_with_keyword', vocab, train)
        if cache.cached:
            return cache.data

        examples, corpus, check_dict = self.raw_utterance(train)
        field = Field(vocab)

        kwext = KeywordExtractor(field)
        for example in prolog(examples, name=' -extract keywords'):
            kws = kwext.extract(example['uttr'].lst)
            example['kwpos'] = kws['kwpos'] 
            example['keywords'] = kws['keywords'] 

        examples = preprocess(examples, field, log=' -process to_pack cls')
        return cache.cache((examples, field, corpus, check_dict))

Ejemplo n.º 30

0

Mostrar archivo

 def synthesize_x1(self, X1_latent, parents=None):
     if isinstance(X1_latent, int):
         N = X1_latent
         X1_latent = np.random.uniform(size=(N, self.latent_dim[1]))
         X1_noise = np.random.normal(scale=0.5, size=(N, self.noise_dim[1]))
     else:
         N = X1_latent.shape[0]
         X1_noise = np.zeros((N, self.noise_dim[1]))
     if parents is None:
         X0 = self.synthesize_x0(1)[0]
     else:
         X0 = parents[0]
     X0 = preprocess(X0)
     X0 = np.tile(X0, (N,1,1,1))
     X1 = self.sess.run(self.x1_fake, feed_dict={self.c1: X1_latent, self.z1: X1_noise, self.x0_fake: X0})
     return [postprocess(X1), postprocess(X0)]

Ejemplo n.º 31

0

Mostrar archivo

 def cell_transform(xs, indexes=None):
     Fs = []
     xs = [preprocess(x) for x in xs]
     for xmb in tqdm(
             iter_data(xs, size=hps.nbatch), ncols=80, leave=False,
             total=len(xs)//hps.nbatch):
         smb = np.zeros((2, hps.nbatch, hps.nhidden))
         n = len(xmb)
         xmb, mmb = batch_pad(xmb, hps.nbatch, hps.nsteps)
         smb = sess.run(cells, {X: xmb, S: smb, M: mmb})
         smb = smb[:, :n, :]
         if indexes is not None:
             smb = smb[:, :, indexes]
         Fs.append(smb)
     Fs = np.concatenate(Fs, axis=1).transpose(1, 0, 2)
     return Fs

Ejemplo n.º 32

0

Mostrar archivo

Archivo: interactive_regression.py Proyecto: lmirel/jetracer

def live(state_widget, model, camera, prediction_widget):
    global dataset
    while state_widget.value == 'live':
        image = camera.value
        preprocessed = preprocess(image)
        output = model(preprocessed).detach().cpu().numpy().flatten()
        category_index = dataset.categories.index(category_widget.value)
        x = output[2 * category_index]
        y = output[2 * category_index + 1]

        x = int(camera.width * (x / 2.0 + 0.5))
        y = int(camera.height * (y / 2.0 + 0.5))

        prediction = image.copy()
        prediction = cv2.circle(prediction, (x, y), 8, (255, 0, 0), 3)
        prediction_widget.value = bgr8_to_jpeg(prediction)

Ejemplo n.º 33

0

Mostrar archivo

def read_data_from_file(data_path: str) -> tuple:
    """
    生成训练的词列表，以及列表的长度。
    :param data_path:
    :return:
    """
    maybe_download()
    with open(data_path) as f:
        text = f.read()
    # 将文本中的特殊标点符号用指定的字符进行替换。
    words = utils.preprocess(text)
    print('First 30 words:', words[:30])
    print('Total words:', len(words))
    print('Total unique words:', len(set(words)))
    # 根据文本生成的单词频率进行由高到低的排序，过滤掉低频词（词出现的次数<5），生成字典id2word以及word2id。
    vocab_to_int, int_to_vocab = utils.create_lookup_tables(words)
    n_vocab = len(int_to_vocab)
    # 由原来的词频进而转化成词的序列，序列通过enumerate来实现的。
    int_words = [vocab_to_int[w] for w in words]
    ###########################################################
    # ------------------- Subsampling -------------------------
    # Some words like "the", "a", "of" etc don't provide much
    # information. So we might want to remove some of them.
    # This results in faster and better result.
    # The probability that a word is discarded is
    # P(w) = 1 - sqrt(1 / frequency(w))
    each_word_count = Counter(int_words)
    total_count = len(int_words)
    threshold = FLAGS.drop_word_threshold
    # 统计词频
    freq_s = {
        word: count / total_count
        for word, count in each_word_count.items()
    }
    prob_s = {
        word: 1 - np.sqrt(threshold / freq_s[word])
        for word in each_word_count
    }

    train_words = [
        word for word in int_words if random.random() < (1 - prob_s[word])
    ]

    print('After subsampling, first 30 words:', train_words[:30])
    print('After subsampling, total words:', len(train_words))

    return train_words, int_to_vocab, vocab_to_int, n_vocab

Ejemplo n.º 34

0

Mostrar archivo

def detect(sess, model, names, image, path):
    preprocess = eval(args.preprocess)
    _, height, width, _ = image.get_shape().as_list()
    _image = read_image(path)
    image_original = np.array(np.uint8(_image))
    image_height, image_width, _ = image_original.shape
    image_std = preprocess(
        np.array(np.uint8(_image.resize((width, height)))).astype(np.float32))
    feed_dict = {image: np.expand_dims(image_std, 0)}
    tensors = [model.conf, model.xy_min, model.xy_max]
    conf, xy_min, xy_max = sess.run(
        [tf.check_numerics(t, t.op.name) for t in tensors],
        feed_dict=feed_dict)
    boxes = utils.postprocess.non_max_suppress(conf[0], xy_min[0], xy_max[0],
                                               args.threshold,
                                               args.threshold_iou)
    scale = [image_width / model.cell_width, image_height / model.cell_height]
    fig = plt.figure()
    ax = fig.gca()
    ax.imshow(image_original)
    colors = [
        prop['color'] for _, prop in zip(
            names, itertools.cycle(plt.rcParams['axes.prop_cycle']))
    ]
    cnt = 0
    for _conf, _xy_min, _xy_max in boxes:
        index = np.argmax(_conf)
        if _conf[index] > args.threshold:
            wh = _xy_max - _xy_min
            _xy_min = _xy_min * scale
            _wh = wh * scale
            linewidth = min(_conf[index] * 10, 3)
            ax.add_patch(
                patches.Rectangle(_xy_min,
                                  _wh[0],
                                  _wh[1],
                                  linewidth=linewidth,
                                  edgecolor=colors[index],
                                  facecolor='none'))
            ax.annotate(names[index] + ' (%.1f%%)' % (_conf[index] * 100),
                        _xy_min,
                        color=colors[index])
            cnt += 1
    fig.canvas.set_window_title('%d objects detected' % cnt)
    ax.set_xticks([])
    ax.set_yticks([])
    return fig

Ejemplo n.º 35

0

Mostrar archivo

def create_bb_pip(tfr_pool, nepoch, sbatch, mean, shuffle=True):
    if len(tfr_pool) == 3:
        ebs = [int(sbatch * 0.5), int(sbatch * 0.3), sbatch - int(sbatch * 0.5) - int(sbatch * 0.3)]
    elif len(tfr_pool) == 1:
        ebs = [sbatch]
    else:
        print("Input Format is not recognized")
        return

    data_pool = []

    for ix, tfr in enumerate(tfr_pool):
        cur_ebs = ebs[ix]
        tokens = tfr.split('/')[-1].split('_')
        dim = int(tokens[-1].split('.')[0][1:])
        tf_mean = tf.constant(mean, dtype=tf.float32)
        tf_mean = tf.reshape(tf_mean, [1, 1, 1, 3])

        fqueue = tf.train.string_input_producer([tfr], num_epochs=nepoch)
        image, gt_key, gt_3d, gt_2d, occ = read_one_datum(fqueue, dim)

        if shuffle:
            data = tf.train.shuffle_batch([image, gt_key, gt_3d, gt_2d, occ], batch_size=cur_ebs,
                                          num_threads=12, capacity=sbatch * 6, min_after_dequeue=cur_ebs * 3)
        else:
            data = tf.train.batch([image, gt_key, gt_3d, gt_2d, occ], batch_size=cur_ebs,
                                  num_threads=12, capacity=cur_ebs * 5)

        # preprocess input images

        # print("data0]", data[0])
        data[0] = preprocess(data[0], tf_mean) #
        # data[0] = preprocess_norm(data[0]) #

        if ix == 0:
            for j in range(len(data)):
                data_pool.append([data[j]])
        else:
            for j in range(len(data)):
                data_pool[j].append(data[j])

    combined_data = []
    for dd in data_pool:
        combined_data.append(tf.concat(dd, axis=0))
    # print("sanity check : combined_data", combined_data)

    return combined_data

Ejemplo n.º 36

0

Mostrar archivo

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = float(data["steering_angle"])
        # The current throttle of the car, how hard to push peddle
        throttle = float(data["throttle"])
        # The current speed of the car
        speed = float(data["speed"])
        ensembling_weight = 0.66
        # The current image from the center camera of the car
        image = Image.open(BytesIO(base64.b64decode(data["image"])))
        try:
            image = np.asarray(image)  # from PIL image to numpy array
            image = utils.preprocess(image)  # apply the preprocessing
            image = np.array([image])  # the model expects 4D array
            global smoothed_angle
            # predict the steering angle for the image
            steering_angle1 = float(model.predict(image, batch_size=1))
            # smoothed_angle += 0.2 * pow(abs((steering_angle - smoothed_angle)), 2.0 / 3.0) * (steering_angle - smoothed_angle) / abs(steering_angle - smoothed_angle)
            # lower the throttle as the speed increases

            steering_angle2 = float(model2.predict(image, batch_size=1))
            steering_angle = float(
                (ensembling_weight * steering_angle1 + steering_angle2 *
                 (1.0 - ensembling_weight)))
            # if the speed is above the current speed limit, we are on a downhill.
            # make sure we slow down first and then go back to the original max speed.
            global speed_limit
            if speed > speed_limit:
                speed_limit = MIN_SPEED  # slow down
            else:
                speed_limit = MAX_SPEED
            throttle = 1.0 - steering_angle**2 - (speed / speed_limit)**2

            print('{} {} {}'.format(steering_angle, throttle, speed))
            send_control(steering_angle, throttle)
        except Exception as e:
            print(e)

        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
    else:

        sio.emit('manual', data={}, skip_sid=True)

Ejemplo n.º 37

0

Mostrar archivo

    def __getitem__(self, idx):
        files = self.sample_files[idx]
        pre_img = cv2.imread(files['pre_img'])
        post_img = cv2.imread(files['post_img'])
        if self.rgb:
            pre_img = cv2.cvtColor(pre_img, cv2.COLOR_BGR2RGB)
            post_img = cv2.cvtColor(post_img, cv2.COLOR_BGR2RGB)

        if self.mode in [
                'train', 'oodtrain', 'guptatrain', "ood2train", "ood3train",
                "singletrain"
        ]:
            sample = self.get_sample_with_mask(files, pre_img, post_img)
            sample['image_id'] = files['img_id']
            if self.preprocessing is not None:
                transformed = preprocess(sample['pre_img'],
                                         sample['post_img'],
                                         sample['mask_img'],
                                         flip=self.preprocessing['flip'],
                                         scale=self.preprocessing['scale'],
                                         crop=self.preprocessing['crop'])
                sample['pre_img'] = transformed[0]
                sample['post_img'] = transformed[1]
                sample['mask_img'] = transformed[2]
        elif self.mode in [
                'oodtest', 'oodhold', 'guptatest', 'guptahold', "ood2test",
                "ood2hold", "ood3test", "ood3hold", "singletest", "singlehold"
        ]:
            pre_img = self.data_transforms(pre_img)
            post_img = self.data_transforms(post_img)
            sample = {
                'pre_img': pre_img,
                'post_img': post_img,
                'image_id': files['img_id']
            }
            post_json = json.loads(open(files['post_json']).read())
            buildings = self._get_building_from_json(post_json)
            sample['mask_img'] = self.make_mask_img(**buildings)
        else:
            pre_img = self.data_transforms(pre_img)
            post_img = self.data_transforms(post_img)
            sample = {
                'pre_img': pre_img,
                'post_img': post_img,
                'image_id': files['img_id']
            }
        return sample

Ejemplo n.º 38

0

Mostrar archivo

Archivo: connect_drive.py Proyecto: nikhilt44/Simulation_of_self_driving_car

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = float(data["steering_angle"])
        # The current throttle of the car, how hard to push peddle
        throttle = float(data["throttle"])
        # The current speed of the car
        speed = float(data["speed"])
        
        # The current image from the center camera of the car
        image = Image.open(BytesIO(base64.b64decode(data["image"])))
        try:
            image = np.asarray(image)      
            image = utils.preprocess(image) # apply the preprocessing
            image = np.array([image])       # the model expects 4D array

            
            # predict the steering angle for the image
            steering_angle = float(model.predict(image, batch_size=1))
            
            
            # lower the throttle as the speed increases
            # if the speed is above the current speed limit, we are on a downhill.          
            global speed_limit
            if speed > speed_limit:
                speed_limit = MIN_SPEED  # slow down
            else:
                speed_limit = MAX_SPEED
            
            
            #Calculate throttle from spped and steering angle
            throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2

            print('{} {} {}'.format(steering_angle, throttle, speed))
            send_control(steering_angle, throttle)
        except Exception as e:
            print(e)

        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
    else:
        
        sio.emit('manual', data={}, skip_sid=True)

Ejemplo n.º 39

0

Mostrar archivo

Archivo: drive.py Proyecto: Madhu009/How_to_simulate_a_self_driving_car

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = float(data["steering_angle"])
        # The current throttle of the car, how hard to push peddle
        throttle = float(data["throttle"])
        # The current speed of the car
        speed = float(data["speed"])
        # The current image from the center camera of the car
        image = Image.open(BytesIO(base64.b64decode(data["image"])))
        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))

        try:
            image = np.asarray(image)       # from PIL image to numpy array
            image = utils.preprocess(image) # apply the preprocessing
            image = np.array([image])       # the model expects 4D array

            # predict the steering angle for the image
            steering_angle = float(model.predict(image, batch_size=1))
            # lower the throttle as the speed increases
            # if the speed is above the current speed limit, we are on a downhill.
            # make sure we slow down first and then go back to the original max speed.
            global speed_limit
            if speed > speed_limit:
                speed_limit = MIN_SPEED  # slow down
            else:
                speed_limit = MAX_SPEED
            throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2

            print('{} {} {}'.format(steering_angle, throttle, speed))
            send_control(steering_angle, throttle)
        except Exception as e:
            print(e)

    else:
        
        sio.emit('manual', data={}, skip_sid=True)

Ejemplo n.º 40

0

Mostrar archivo

Archivo: test.py Proyecto: ZhangShiyue/LDA

def get_data():
    """
    获取语料库
    """
    texts = []
    subfolds = os.listdir("../Sample")
    for subfold in subfolds:
        subdir = "../Sample/{}".format(subfold)
        if os.path.isdir(subdir):
            files = os.listdir(subdir)
            for file in files:
                text = open("{}/{}".format(subdir, file)).read()
                text = utils.preprocess(text)
                text = utils.getWordlist(text)
                texts.append(text)

    # 去除只出现一次的词
    frequency = defaultdict(int)
    for text in texts:
        for token in text:
            frequency[token] += 1
    texts = [[token for token in text if frequency[token] > 1] for text in texts]

    return texts

Ejemplo n.º 41

0

Mostrar archivo

Archivo: train.py Proyecto: skyfallen/SecondDataScienceBowl

def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model(img_size)
    model_diastole = get_model(img_size)

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    # define image generator for random rotations
    datagen = ImageDataGenerator(featurewise_center=False,
                                 featurewise_std_normalization=False,
                                 rotation_range=15)

    nb_iter = 300
    epochs_per_iter = 1
    batch_size = 64
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    if not os.path.exists(STATS):
        os.makedirs(STATS)

    with open(STATS + 'RMSE_CRPS.txt', 'w') as f:
        names = ['train_RMSE_d', 'train_RMSE_s', 'test_RMSE_d', 'test_RMSE_s', 'train_crps', 'test_crps']
        f.write('\t'.join([str(name) for name in names]) + '\n')

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)
        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))
        print('Fitting diastole model...')

        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')


        # save weights so they can be loaded later
        model_systole.save_weights(MODELS + 'weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights(MODELS + 'weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights(MODELS + 'weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights(MODELS + 'weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open(MODELS + 'val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

        with open(STATS + 'RMSE_CRPS.txt', 'a') as f:
            # train_RMSE_d train_RMSE_s test_RMSE_d test_RMSE_s train_crps test_crps
            rmse_values = [loss_diastole, loss_systole, val_loss_diastole, val_loss_systole]
            crps_values = [crps_train, crps_test]
            f.write('\t'.join([str(val) for val in rmse_values + crps_values]) + '\n')

        print('Saving stats images...')
        write_images(STATS)
        
        if (i != 0) & ((i + 1) % 100 == 0):
	    print('Submitting learned model....')
            SUBMISSION_FOLDER = SUBMISSION + preproc_type + "/" + model_name + "/" + get_name() + "_ITERS" + str(i + 1) + "/" 
            if not os.path.exists(SUBMISSION_FOLDER):
                os.makedirs(SUBMISSION_FOLDER)
            copyfile(MODELS + 'weights_systole_best.hdf5', SUBMISSION_FOLDER + 'weights_systole_best.hdf5')
            copyfile(MODELS + 'weights_diastole_best.hdf5', SUBMISSION_FOLDER + 'weights_diastole_best.hdf5')
            copyfile(MODELS + 'val_loss.txt', SUBMISSION_FOLDER + 'val_loss.txt')
            os.system('python submission.py %s %s %s' % (preproc_type, model_name, SUBMISSION_FOLDER))

Ejemplo n.º 42

0

Mostrar archivo

Archivo: train.py Proyecto: zhufangzhou/localization

def main(num_epochs=500):
    # Load the dataset
    print 'Loading dataset ...'
    eng_para = pd.read_csv('data/2g_gongcan.csv')
#eng_para = eng_para.loc[:, ['LAC', 'CI', 'Angle', 'Longitude', 'Latitude', 'Power', 'GSM Neighbor Count', 'TD Neighbor Count']]
    tr_feature, tr_label, tr_ids = load_dataset('data/forward_recovered.csv', eng_para, True) 
    te_feature, te_label, te_ids = load_dataset('data/backward_recovered.csv', eng_para, False)
    ## !!! maybe here need to ensure train data are the same shape as test data
    train_size, n_con = tr_feature.shape
    test_size, n_con = te_feature.shape
    n_dis = len(tr_ids) 

    # Create neural network model
    print 'Preprocessing data ...'
    # Standardize continous input
    tr_feature, te_feature = preprocess(tr_feature, te_feature)
    tr_input = {'con_input' : tr_feature}
    te_input = {'con_input' : te_feature}
    # Prepare embedding input
    dis_dims, vocab_sizes = [], []
    for ii, tr_ids_, te_ids_ in zip(range(n_dis), tr_ids, te_ids): # make sure tr_ids contain several different discrete features
        vocab_size, vocab_dict = make_vocab(tr_ids_, te_ids_) 
        tr_id_idx_, te_id_idx_ = [], []
        dis_dim = len(tr_ids_)
        for i in range(dis_dim):
            tr_id_idx_ += map(lambda x: vocab_dict[x], tr_ids_[i])
            te_id_idx_ += map(lambda x: vocab_dict[x], te_ids_[i])
        tr_ids = np.array(tr_id_idx_, dtype=np.int32).reshape(dis_dim, train_size).transpose()
        te_ids = np.array(te_id_idx_, dtype=np.int32).reshape(dis_dim, test_size).transpose()

        ## Add discrete feature to dict
        tr_input['emb_input%d' % ii] = tr_ids
        te_input['emb_input%d' % ii] = te_ids

        dis_dims.append(dis_dim)
        vocab_sizes.append(vocab_size)

    print 'Building model and compiling functions ...'
    # Define network structure
    l_output = build_mlp(n_con, n_dis, dis_dims, vocab_sizes)
    
    # Set batch size
    bi = BatchIterator(batch_size=10)

    # Build network
    network = NeuralNet(l_output,
                 regression=True,
                 update_learning_rate=1e-5,
                 update=nesterov_momentum,
                 update_momentum=0.9,
                 train_split=TrainSplit(eval_size=0.05),
                 verbose=1,
                 batch_iterator_train=bi,
                 objective_loss_function=lasagne.objectives.squared_error,
                 max_epochs=5000)

    pickle_name = 'MLP-0.10.pickle'

    mul_val = 10000.
    lon_offset = np.mean(tr_label[:, 0])
    lon_std = np.mean(tr_label[:, 0])
    lat_offset = np.mean(tr_label[:, 1])
    lat_std = np.mean(tr_label[:, 1])
    ######## Change Target
    tr_label[:, 0] = (tr_label[:, 0] - lon_offset) * mul_val 
    tr_label[:, 1] = (tr_label[:, 1] - lat_offset) * mul_val 
    tr_label = tr_label.astype(np.float32)
    print tr_label

    is_train = True
    if is_train:
        network.fit(tr_input, tr_label)
        # Dump Network
        with open('model/'+pickle_name, 'wb') as f:
           pickle.dump(network, f, -1)
    else:
        # Load Network
        f = open('model/'+pickle_name)
        network = pickle.load(f) 

    # Make prediction
    te_pred = network.predict(te_input)

    te_pred[:, 0] = te_pred[:, 0] / mul_val + lon_offset
    te_pred[:, 1] = te_pred[:, 1] / mul_val + lat_offset
    f_out = open('pred.csv', 'w')
    for pred_pt, true_pt in zip(te_pred, te_label):
        f_out.write('%f,%f,%f,%f\n' % (pred_pt[0], pred_pt[1], true_pt[0], true_pt[1]))

    # Generate report
    gen_report(te_label, te_pred, pickle_name)

Ejemplo n.º 43

0

Mostrar archivo

Archivo: mlp.py Proyecto: zhufangzhou/Imputation

def mlp(tr_data, te_data, eng_para, col_name, grid_size, \
        optimizer, batch_size, hidden_size, mlp_feature, \
        nb_epoch, prediction, model_name, is_train):
    # Load the dataset
    print 'Loading dataset ...'
    tr_feature, tr_label, tr_ids = mlp_feature(tr_data, eng_para, True, col_name)
    te_feature, te_label, te_ids = mlp_feature(te_data, eng_para, True, col_name)
    rg = RoadGrid(np.vstack((tr_label, te_label)), grid_size)
    tr_label = rg.transform(tr_label)
    # te_label = rg.transform(te_label)

    ## !!! maybe here need to ensure train data are the same shape as test data
    train_size, n_con = tr_feature.shape
    test_size, n_con = te_feature.shape
    n_dis = len(tr_ids)

    # Create neural network model
    print 'Preprocessing data ...'
    # Standardize continous input
    # tr_feature, te_feature = preprocess(tr_feature, te_feature)
    tr_feature, te_feature = preprocess(tr_feature, te_feature)
    # te_feature = preprocess(te_feature)
    tr_input = {'con_input' : tr_feature, 'output' : tr_label}
    te_input = {'con_input' : te_feature}
    # Prepare embedding input
    dis_dims, vocab_sizes = [], []
    for ii, tr_ids_, te_ids_ in zip(range(n_dis), tr_ids, te_ids): # make sure tr_ids contain several different discrete features
        vocab_size, vocab_dict = make_vocab(tr_ids_, te_ids_)
        tr_id_idx_, te_id_idx_ = [], []
        dis_dim = len(tr_ids_)
        for i in range(dis_dim):
            tr_id_idx_ += map(lambda x: vocab_dict[x], tr_ids_[i])
            te_id_idx_ += map(lambda x: vocab_dict[x], te_ids_[i])
        tr_ids = np.array(tr_id_idx_, dtype=np.int32).reshape(dis_dim, train_size).transpose()
        te_ids = np.array(te_id_idx_, dtype=np.int32).reshape(dis_dim, test_size).transpose()

        ## Add discrete feature to dict
        tr_input['emb_input%d' % ii] = tr_ids
        te_input['emb_input%d' % ii] = te_ids

        dis_dims.append(dis_dim)
        vocab_sizes.append(vocab_size)

    print 'Building model and compiling functions ...'
    # Define network structure
    grid_info = rg.grid_center
    network = build_mlp(n_con, n_dis, dis_dims, vocab_sizes, len(grid_info), hidden_size)

#network.compile(loss={'output': 'categorical_crossentropy'}, optimizer=SGD(lr=1e-2, momentum=0.9, nesterov=True))
    network.compile(loss={'output': 'categorical_crossentropy'}, optimizer=optimizer)

    # Build network
    # pickle_name = 'MLP-softmax-0.4.pickle'
    pickle_name = model_name

    if is_train:
        history = network.fit(tr_input, nb_epoch=nb_epoch, batch_size=batch_size, verbose=1)
        # Dump Network
        with open('model/'+pickle_name, 'wb') as f:
           pickle.dump(network, f, -1)
    else:
        # Load Network
        f = open('model/'+pickle_name)
        network = pickle.load(f)

    # Make prediction
    ## 1. weighted
    if prediction == 'weighted':
        te_pred = np.asarray(network.predict(te_input)['output'])
        te_pred = te_pred.dot(grid_info)
    # Generate report
    # gen_report(te_label, te_pred, pickle_name, [type(optimizer), batch_size, hidden_size, 'Weighted'])
    elif prediction == 'argmax':
    ## 2. argmax
        te_pred = np.asarray(network.predict(te_input)['output'])
        te_pred = np.argmax(te_pred, axis=1)
        te_pred = [grid_info[idx] for idx in te_pred]
    # Generate report
    # gen_report(te_label, te_pred, pickle_name, [type(optimizer), batch_size, hidden_size, 'Argmax'])
    else:
        te_pred = None
    return te_pred

Ejemplo n.º 44

0

Mostrar archivo

Archivo: train.py Proyecto: dwyatte/data-sci-bowl-2016

def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

Ejemplo n.º 45

0

Mostrar archivo

Archivo: timeseriesprocessor.py Proyecto: kristofvarszegi/mibbci

    def run(self, load_pipeline=False, train_more=False):

        # Load/pretrain the net
        if load_pipeline:

            # Load the processing pipeline
            nn_input_shape = self.get_nn_input_shape()
            nnet, numer, denom, scaler = utils.load_processing_pipeline(
                    self._filename_pipeline_base,
                    self._is_scaling_needed,
                    nn_type=self._nn_type,
                    nn_input_shape=nn_input_shape,
                    nn_output_shape=self._num_event_types,
                    num_max_training_epochs=self._num_max_training_epochs)

            if train_more:

                # Load the training data
                X_train_raw, labels_train = utils.load_data(
                        data_filename=self._filename_train,
                        signal_col_ids=self._signal_col_ids,
                        label_col_ids=self._label_col_ids,
                        decimation_factor=self._decimation_factor)

                # Preprocess the data
                numer, denom, scaler = utils.init_preprocessors(
                        X_raw=X_train_raw,
                        freq_sampling=self._freq_sampling,
                        freq_cut_lo=self._freq_cut_lo,
                        freq_cut_hi=self._freq_cut_hi,
                        M_fir=self._M_fir,
                        artifact_threshold=self._artifact_threshold)
                if not self._is_scaling_needed:
                    scaler = None
                X_train_preproc, labels_train = utils.preprocess(
                        X_train_raw, labels_train,
                        tdfilt_numer=numer, tdfilt_denom=denom,
                        # reref_channel_id=params.REREF_CHANNEL_ID,
                        artifact_threshold=self._artifact_threshold,
                        # power=True,
                        # mov_avg_window_size=params.MOVING_AVG_WINDOW_SIZE_SECS,
                        scaler=scaler,
                        window_size=self._window_size_decimated_in_samples,
                        nn_type=self._nn_type)

                # Train the NN
                nnet = nnutils.train_nn_from_timeseries(
                        nnet, self._nn_type,
                        X_train_preproc, labels_train,
                        self._window_size_decimated_in_samples,
                        self._num_event_types,
                        self._num_train_data_instances,
                        plot_history=False)

                # Save the pipeline
                utils.save_processing_pipeline(
                        nnet, self._nn_type, numer, denom, scaler)

        else:   # If a new net is to be created

            # Load the training data
            logging.debug('%s Loading the training data...', TAG)
            X_train_raw, labels_train = utils.load_data(
                    data_filename=self._filename_train,
                    signal_col_ids=self._signal_col_ids,
                    label_col_ids=self._label_col_ids,
                    decimation_factor=self._decimation_factor)
            logging.debug('%s X_train_raw.shape: %s', TAG, str(X_train_raw.shape))
            logging.debug('%s labels_train.shape: %s', TAG, str(labels_train.shape))
            logging.debug('%s np.sum(labels_train, axis=0): %s', TAG, str(np.sum(labels_train, axis=0).tolist()))
            logging.debug('%s Training data loaded.', TAG)

            # Preprocess the data
            numer, denom, scaler = utils.init_preprocessors(
                    X_train_raw,
                    self._freq_sampling,
                    self._freq_cut_lo,
                    self._freq_cut_hi,
                    self._M_fir,
                    artifact_threshold=self._artifact_threshold,
                    plot=False)
            if not self._is_scaling_needed:
                scaler = None
            X_train_preproc, labels_train = utils.preprocess(
                    X_train_raw, labels_train,
                    tdfilt_numer=numer, tdfilt_denom=denom,
                    # reref_channel_id=params.REREF_CHANNEL_ID,
                    artifact_threshold=self._artifact_threshold,
                    # power=True,
                    # mov_avg_window_size=params.MOVING_AVG_WINDOW_SIZE_SECS,
                    scaler=scaler,
                    window_size=self._window_size_decimated_in_samples,
                    nn_type=self._nn_type)
            # labels_train = labels_train

            # Plot the training data
            if self._is_plot_mode_on:
                logging.debug('%s Plotting the preprocessed training data... %s', TAG, self._nn_type)
                time_axis = np.arange(X_train_preproc.shape[0])
                if 'gtec' in self._nn_type:
                    t_from = 0
                    t_to = t_from + 120 * self._freq_sampling
                    plot_cols = range(16)
                    #plot_cols = (1, 3, 9)
                    #plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_rows, plot_cols], label='tdfilt')
                    #plt.plot(time_axis[t_from:t_to], X_train_raw[t_from:t_to, plot_cols], label='raw')
                    plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_cols], label='tdfilt')
                    plt.plot(time_axis[t_from:t_to], 10.0*labels_train[t_from:t_to], linewidth=3, label='event')
                elif 'biosemi' in self._nn_type:
                    t_from = 20000
                    t_to = t_from + 1000 * self._freq_sampling
                    #plot_rows = (6)
                    #plot_cols = (0, 1, 2, 3, 4, 5, 6, 7)
                    #plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_rows, plot_cols], label='tdfilt')
                    #plt.plot(time_axis[t_from:t_to], X_train_raw[t_from:t_to, plot_cols], label='raw')
                    plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_cols], label='tdfilt')
                    plt.plot(time_axis[t_from:t_to], -300000.0*labels_train[t_from:t_to], linewidth=3, label='event')
                elif 'gal' in self._nn_type:
                    t_from = 0
                    t_to = t_from + 1000 * self._freq_sampling
                    #plot_rows = (6)
                    plot_cols = (0, 1, 2, 3, 4, 5, 6, 7)
                    #plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_rows, plot_cols], label='tdfilt')
                    #plt.plot(time_axis[t_from:t_to], X_train_raw[t_from:t_to, plot_cols], label='raw')
                    plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_cols], label='tdfilt')
                    plt.plot(time_axis[t_from:t_to], 10.0*labels_train[t_from:t_to], linewidth=3, label='event')
                else:
                    logging.critical('%s Unknown source make.', TAG)

                plt.legend(loc='lower right')
                plt.show()

            # Init the NN
            nn_input_shape = self.get_nn_input_shape()
            nnet, _ = nnfactory.create_nn(
                    nn_type=self._nn_type,
                    nn_input_shape=nn_input_shape,
                    nn_output_shape=self._num_event_types,
                    num_max_training_epochs=self._num_max_training_epochs)

            # Train the NN
            logging.debug('%s Training the NN...', TAG)
            nnet = nnutils.train_nn_from_timeseries(
                    nnet, self._nn_type,
                    X_train_preproc, labels_train,
                    self._window_size_decimated_in_samples,
                    self._num_event_types,
                    self._num_train_data_instances,
                    plot_history=False)
            logging.debug('%s Training the NN finished.', TAG)

            # Save the pipeline
            utils.save_processing_pipeline(
                    nnet, self._nn_type,
                    numer, denom, scaler)


        # Load the test data
        logging.debug('%s Loading the test data...', TAG)
        X_test_raw, labels_test = utils.load_data(
                data_filename=self._filename_test,
                signal_col_ids=self._signal_col_ids,
                label_col_ids=self._label_col_ids,
                decimation_factor=self._decimation_factor)
        if self._is_runtest_mode_on:
            X_test_raw = X_test_raw[0:X_test_raw.shape[0]/4]
            labels_test = labels_test[0:labels_test.shape[0]/4]

        # Pre-process the test data
        if not self._is_scaling_needed:
            scaler = None
        X_test_preproc, labels_test = utils.preprocess(
                X_test_raw, labels_test,
                tdfilt_numer=numer, tdfilt_denom=denom,
                #reref_channel_id=params.REREF_CHANNEL_ID,
                artifact_threshold=self._artifact_threshold,
                #power=True,
                #mov_avg_window_size=params.MOVING_AVG_WINDOW_SIZE_SECS,
                scaler=scaler,
                window_size=self._window_size_decimated_in_samples,
                nn_type=self._nn_type)
        #X_test_preproc =   X_test_preproc[0:40000, :]
        #labels_test = labels_test[0:40000, :]

        # Dummy set for testing
        #X_test_preproc = X_train = np.tile(np.reshape(labels_test[:, 0], (labels_test.shape[0], 1)), [1, params.NUM_CHANNELS])

        # Test the net
        batch_iter_test_valid = TimeSeriesBatchIterator(
                data=X_test_preproc, labels=None,
                nn_type=self._nn_type,
                window_size_samples=self._window_size_decimated_in_samples,
                nn_output_shape=self._num_event_types,
                batch_size=params.BATCH_SIZE)
        nnet.batch_iterator_train = None
        nnet.batch_iterator_test = batch_iter_test_valid
        indices_test = np.arange(X_test_preproc.shape[0])
        logging.debug('%s Testing the net...', TAG)
        utils.log_timestamp()
        predictions = nnet.predict_proba(indices_test)
        utils.log_timestamp()
        logging.debug('%s Predictions size: %d, %d', TAG, predictions.shape[0], predictions.shape[1])
        logging.debug('%s np.sum(predictions): %f', TAG, np.sum(predictions))

        # Find the thresholds
        #tpr_targets = (0.5, 0.5, 0.5, 0.5, 0.5, 0.5)
        tpr_targets = (0.5, 0.5)
        p_thresholds = utils.calculate_auroc(
                labels_test, predictions,
                self._event_name_list,
                tpr_targets,
                self._nn_type,
                plot=self._is_plot_mode_on)
        #p_thresholds = (0.1, 0.1, 0.1)
        logging.debug('%s p_thresholds: %s', TAG, str(p_thresholds))

        # Simulate control signal
        if self._is_control_simulation_on:
            TimeSeriesProcessor.create_control_signal(
                    labels_test, predictions, p_thresholds,
                    self._nn_type);

Ejemplo n.º 46

0

Mostrar archivo

Archivo: mnist.py Proyecto: chhu0830/NCTU_106-2_machine-learning

''' setting parameters according to the README '''
prob = svm_problem(train_labels, train_images)
param = svm_parameter('-q')
param_best = svm_parameter('-c 32 -g 0.0078125 -q')
param_linear = svm_parameter('-t 0 -q')
param_poly = svm_parameter('-t 1 -g 1 -q')
param_rbf  = svm_parameter('-g 0.0078125 -q')

model = svm_train(prob, param)


"""
''' precompute-kernel in generate by precompute-kernel.py '''
pre_train_labels, pre_train_images = svm_read_problem('../../../lab5/data/precompute-kernel-train')
pre_test_labels, pre_test_images = svm_read_problem('../../../lab5/data/precompute-kernel-test')

print('File loaded')
prob_pre = svm_problem(pre_train_labels, pre_train_images, isKernel=True)
param_pre = svm_parameter('-t 4')

model = svm_train(prob_pre, param_pre)
"""

''' get support vectors '''
n = model.get_sv_indices()
n = [i-1 for i in n]

''' draw support vectors and dots in 2D space with PCA '''
images, labels = preprocess(path='../../../lab5/data/')
pca(images, labels, special=n)

Ejemplo n.º 47

0

Mostrar archivo

Archivo: preprocess.py Proyecto: seckcoder/lang-learn

# preprocess the training text


from utils import preprocess

with open("raw.txt") as fin:
    text = fin.read()
    print preprocess(text)

Ejemplo n.º 48

0

Mostrar archivo

Archivo: bci_cursor.py Proyecto: kristofvarszegi/mibbci

def cursor_func(
        freq_sampling,
        num_signal_channels,
        num_event_types,
        window_size_in_samples):

    logging.debug('%s cursor_func(.) entered.', TAG)

    len_padding = 5 * freq_sampling

    cursor_radius = 26
    w = 2 * math.pi / 10

    # Initialize the time-domain filter
    #numer, denom = get_time_domain_filters(8.0, 12.0, 0.5)

    # Init the NN
    if is_control_mode:
        filename_base = '../models/MIBBCI_NN_medium_bestsofar'
        filename_nn = filename_base + '.npz'
        nnet = nnutils.load_nn(nnutils.create_nn_medium, filename_nn)

    # Init the preproc stuff
    if is_control_mode:
        filename_p = filename_base + '.p'
        scaler = cPickle.load(open(filename_p, 'rb'))
        print 'Loaded scaler.mean_, scaler.var_:', scaler.mean_, scaler.var_

    # Init graphics
    win = graphics.GraphWin('Cursor', params.IMAGE_W, params.IMAGE_H)
    cursor = graphics.Circle(graphics.Point(params.IMAGE_W/2, params.IMAGE_H/2), cursor_radius)
    cursor.setFill(graphics.color_rgb(params.CURSOR_COLOR_REST[0], params.CURSOR_COLOR_REST[1], params.CURSOR_COLOR_REST[2]))
    cursor.setOutline(graphics.color_rgb(params.CURSOR_COLOR_REST[0], params.CURSOR_COLOR_REST[1], params.CURSOR_COLOR_REST[2]))
    cursor.draw(win)
    cursor_pos_prev = np.array([params.IMAGE_W/2, params.IMAGE_H/2])
    cursor_pos = cursor_pos_prev

    # Init event labels
    event_arr_right = np.zeros((params.LEN_DATA_CHUNK_READ, num_event_types))
    event_arr_right[:, params.EVENT_ID_RH] = np.ones(params.LEN_DATA_CHUNK_READ)
    event_arr_left = np.zeros((params.LEN_DATA_CHUNK_READ, num_event_types))
    event_arr_left[:, params.EVENT_ID_LH] = np.ones(params.LEN_DATA_CHUNK_READ)
    event_arr_idle = np.zeros((params.LEN_DATA_CHUNK_READ, num_event_types))
    event_arr_idle[:, params.EVENT_ID_IDLE] = np.ones(params.LEN_DATA_CHUNK_READ)
    #event_arr_calib = np.zeros((params.LEN_DATA_CHUNK_READ, num_event_types))
    #event_arr_calib[:, 3] = np.ones(params.LEN_DATA_CHUNK_READ)
    cursor_event_list = []
    cursor_color_arr_raw = np.zeros((int(params.LEN_PERIOD_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ), 3))
    color_counter = 0
    for i in range(int(params.LEN_IDLE_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)):
        cursor_color_arr_raw[color_counter, :] = params.CURSOR_COLOR_IDLE
        cursor_event_list.append(event_arr_idle)      # r, l, idle, calib
        color_counter += 1
    for i in range(int(params.LEN_RIGHT_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)):
        cursor_color_arr_raw[color_counter, :] = params.CURSOR_COLOR_RIGHT
        cursor_event_list.append(event_arr_right)
        color_counter += 1
    for i in range(int(params.LEN_IDLE_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)):
        cursor_color_arr_raw[color_counter, :] = params.CURSOR_COLOR_IDLE
        cursor_event_list.append(event_arr_idle)
        color_counter += 1
    for i in range(int(params.LEN_LEFT_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)):
        cursor_color_arr_raw[color_counter, :] = params.CURSOR_COLOR_LEFT
        cursor_event_list.append(event_arr_left)
        color_counter += 1
    conv_window = np.ones((params.LEN_COLOR_CONV_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ, 1))\
                  / (1 * int(params.LEN_COLOR_CONV_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ))
    cursor_color_arr_ud = np.flipud(cursor_color_arr_raw)
    cursor_color_arr_ud_convd = signal.convolve(cursor_color_arr_ud.T, conv_window.T).T
    cursor_color_arr_final = np.flipud(cursor_color_arr_ud_convd[0:cursor_color_arr_raw.shape[0], :])
    if False:
        plt.figure()
        plt.plot(cursor_color_arr_raw)
        #plt.plot(cursor_color_arr_ud[:, 0])
        #plt.plot(cursor_color_arr_ud_convd[:, 0])
        plt.plot(cursor_color_arr_final)
        #plt.legend(['raw', 'ud', 'ud_convd', 'final'])
        plt.show()

    # Initialize the amplifier
    if not is_simulation_mode:
        print 'Initializing the amp...'
        recorder = Recorder('lslamp', freq_sampling, params.LEN_REC_BUF_SEC, num_signal_channels)
        thread_rec = threading.Thread(target=recorder.record)
        thread_rec.start()

    # Cursor control loop
    X_raw_buf_live = np.zeros((int(freq_sampling*params.LEN_REC_BUF_SEC), num_signal_channels))
    label_buf_live = np.zeros((int(freq_sampling*params.LEN_REC_BUF_SEC), num_event_types))
    counter = 0
    #while True:
    while counter < (params.LEN_REC_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ):
        print 'counter: ', counter

        # Clear the canvas
        win.delete('all')

        if not is_simulation_mode:
            # Wait for new data and get it
            data_last_chunk = recorder.get_new_data(params.LEN_DATA_CHUNK_READ, params.AMP_WAIT_SEC)
            recorder.acknowledge_new_data()
            print 'recorder.new_data_counter:', recorder.new_data_counter
        else:
            time.sleep(1.0 / (freq_sampling/params.LEN_DATA_CHUNK_READ))
            data_last_chunk = 1000.0 * np.random.rand(int(params.LEN_DATA_CHUNK_READ), num_signal_channels)
            #print 'Random data_last_chunk size:', data_last_chunk

        # Insert the new sample into our time series
        i_row_lb = int((counter+len_padding)*params.LEN_DATA_CHUNK_READ)
        i_row_ub = int((counter+len_padding+1)*params.LEN_DATA_CHUNK_READ)
        X_raw_buf_live[i_row_lb:i_row_ub, :] = data_last_chunk
        #print 'data_last_chunk:', data_last_chunk
        label_buf_live[i_row_lb:i_row_ub, :]\
                = cursor_event_list[counter % int(params.LEN_PERIOD_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)]

        # Calculating cursor step
        i_row_ub = int((counter+len_padding+1)*params.LEN_DATA_CHUNK_READ)
        i_row_lb = i_row_ub - int(window_size_in_samples)
        if i_row_lb >= 0:
            #print 'i_row_lb, i_row_ub:', i_row_lb, i_row_ub
            #print 'X_raw_buf_live[i_row_lb:i_row_ub, :].shape:', X_raw_buf_live[i_row_lb:i_row_ub, :].shape
            if is_control_mode:
                X_window = utils.preprocess(X_raw_buf_live[i_row_lb:i_row_ub, :], scaler)
                X_in = TimeSeriesBatchIterator.create_X_instance(X_window, conv_dim=1)
                X_in = X_in.reshape(1, X_in.shape[0], X_in.shape[1])
                #print 'X_window.shape:', X_window.shape
                #print 'X_in.shape:', X_in.shape
                cursor_step = calc_cursor_step(nnet, X_in.astype(np.float32))
            else:
                #X_window = X_raw_buf_live[i_row_lb:i_row_ub, :]
                cursor_step = 0

            cursor_pos = cursor_pos_prev + np.array([cursor_step, 0])
            #print 'cursor_pos: ', cursor_pos

        else:
            cursor_pos = cursor_pos_prev

        cursor_pos_point = graphics.Point(cursor_pos[0], cursor_pos[1])
        cursor_pos_prev = cursor_pos
        cursor = graphics.Circle(cursor_pos_point, cursor_radius)
        color_temp = cursor_color_arr_final[counter % int(params.LEN_PERIOD_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)]
        cursor.setFill(graphics.color_rgb(color_temp[0], color_temp[1], color_temp[2]))
        cursor.setOutline(graphics.color_rgb(color_temp[0], color_temp[1], color_temp[2]))
        cursor.draw(win)

        counter += 1

        # End of if
    # End of while

    # Stop recording
    recorder.stop_recording()

    # Close the window
    win.close()

    # Cut the padding from the data
    i_row_lb = int(len_padding * params.LEN_DATA_CHUNK_READ)
    i_row_ub = int((counter+len_padding)*params.LEN_DATA_CHUNK_READ)
    X_raw_buf_cut = X_raw_buf_live[i_row_lb:i_row_ub, :]
    label_buf_cut = label_buf_live[i_row_lb:i_row_ub, :]

    # Save data to file
    time_axis = np.arange(X_raw_buf_cut.shape[0]).reshape((X_raw_buf_cut.shape[0], 1))
    print 'time_axis.shape:', time_axis.shape
    data_merged = np.concatenate((time_axis, X_raw_buf_cut, label_buf_cut), axis=1)
    print 'data_merged.shape: ', data_merged.shape
    time_save = datetime.now()
    np.savetxt('../data/MIBBCI_REC_{0}Hz_{1}{2:02}{3:02}_{4:02}h{5:02}m{6:02}s_RAW.csv'.format(
            int(freq_sampling),
            time_save.year, time_save.month, time_save.day,
            time_save.hour, time_save.minute, time_save.second),
            X=data_merged, fmt='%.8f', delimiter=",",
            header='time, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, red, blue, idle',
            comments='')


    print 'cursor_func(.) terminates.'

Ejemplo n.º 49

0

Mostrar archivo

Archivo: featurecreation.py Proyecto: jacobboy/spring14

    y = "year"
    aff = "affiliation"
    affn = "aff_nullable"
    numa = "number_of_authors"
    coa = "coauthors"
    cit = "cit_count"

    s = json.load(open(config_file))
    conn_str = ('driver=%s; server=%s; uid=%s; pwd=%s; db=%s' %
                (s['driver'], s['server'], s['uid'], s['pwd'], s['db']))
    conn = pyodbc.connect(conn_str)
    """
    make the pairs
    """
    df = pd.io.sql.read_sql(select1, conn)
    df[ab] = df[ab].map(lambda s : " ".join([utils.preprocess(x) for x in json.loads(s).itervalues()]))
    df[ti] = df[ti].map(lambda x: utils.preprocess(x))
    # see
    # http://stackoverflow.com/questions/13446480/python-pandas-remove-entries-based-on-the-number-of-occurrences#comment18556837_13447176
    # for a better way?
    counts = df.groupby(un).size()
    counts = counts[counts != 1]
    df = df[df[un].isin(counts.index.values)]
    cursor = conn.cursor()
    df[coa] = df.apply(lambda x: utils.query_coauths(cursor, int(x[pmid]), int(x[id])), axis=1)['pmid']
    cursor.close()


    ungroup = df.groupby(un)
    bases_idx = []
    matches_idx = []

Ejemplo n.º 50

0

Mostrar archivo

Archivo: main.py Proyecto: YJieZhang/MobileNetV2

def main():
    # parse arguments
    args=parse_args()

    if args.cpu:
        os.environ["CUDA_VISIBLE_DEVICES"] = ""

    sess = tf.Session()

    if args.is_train:
        # read tfrecord files
        glob_pattern=os.path.join(args.dataset_dir, '*.tfrecord')
        tfrecords_list = glob.glob(glob_pattern)

        # check dirs
        if not os.path.exists(args.checkpoint_dir):
            os.makedirs(args.checkpoint_dir)
        if not os.path.exists(args.logs_dir):
            os.makedirs(args.logs_dir)

        model=MobileNetV2(sess=sess, tf_files=tfrecords_list, num_sampes=args.num_samples,
                      epoch=args.epoch, batch_size=args.batch_size,
                      image_height=args.image_height, image_width=args.image_width,
                      n_classes=args.n_classes,
                      is_train=args.is_train, learning_rate=args.learning_rate,
                      lr_decay=args.lr_decay,beta1=args.beta1,
                      chkpt_dir=args.checkpoint_dir, logs_dir=args.logs_dir,
                      model_name=args.model_name, rand_crop=args.rand_crop)
        model._build_train_graph()
        model._train()
    else:
        model=MobileNetV2(sess=sess, tf_files='', num_sampes=args.num_samples,
                      epoch=args.epoch, batch_size=args.batch_size,
                      image_height=args.image_height, image_width=args.image_width,
                      n_classes=args.n_classes,
                      is_train=args.is_train, learning_rate=args.learning_rate,
                      lr_decay=args.lr_decay,beta1=args.beta1,
                      chkpt_dir=args.checkpoint_dir, logs_dir=args.logs_dir,
                      model_name=args.model_name, rand_crop=args.rand_crop)
        model._build_test_graph()
        saver=tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            saver.restore(sess, os.path.join(args.checkpoint_dir, ckpt_name))
            print("[*] Success to read {}".format(ckpt_name))
        else:
            print("[*] Failed to find a checkpoint")
            return

        # get input and output tensors from graph
        graph = tf.get_default_graph()
        input_x = graph.get_tensor_by_name("input:0")
        input_y = graph.get_tensor_by_name("label:0")
        prob = graph.get_tensor_by_name("mobilenetv2/prob:0")

        # prepare eval/test data and label
        img=imread('data/test/t_1_0.jpeg')
        img = imresize(img, (args.image_height, args.image_width))
        img=preprocess(img)
        print(img.dtype)
        label=1
        feed_dict={input_x:[img],input_y:[label]} # use [], because we need 4-D tensor

        start=time.time()
        res=sess.run(prob, feed_dict=feed_dict)[0] # index 0 for batch_size
        print('prob: {}, class: {}'.format(res, np.argmax(res)))
        print('time: {}'.format(time.time()-start))

Ejemplo n.º 51

0

Mostrar archivo

Archivo: train.py Proyecto: sjuvekar/kaggle-dsb2-keras

def train(train_prefix_dir="/data/heart"):
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_vgg_model()
    model_diastole = get_vgg_model()

    print('Loading training data...')
    X, y = load_train_data(train_prefix_dir)

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    # Create Image Augmentation
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)  # randomly flip images

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)
    datagen.fit(X_train)

    # Create model checkpointers for systole and diastole
    systole_checkpointer_best = ModelCheckpoint(filepath="weights_systole_best.hdf5", verbose=1, save_best_only=True)
    diastole_checkpointer_best = ModelCheckpoint(filepath="weights_diastole_best.hdf5", verbose=1, save_best_only=True)
    systole_checkpointer = ModelCheckpoint(filepath="weights_systole.hdf5", verbose=1, save_best_only=False)
    diastole_checkpointer = ModelCheckpoint(filepath="weights_diastole.hdf5", verbose=1, save_best_only=False)

    # Create 600-dimentional y cdfs from observations
    y_syst_train = np.array([(i < np.arange(600)) for i in y_train[:, 0]], dtype=np.uint8)
    y_syst_test = np.array([(i < np.arange(600)) for i in y_test[:, 0]], dtype=np.uint8)
    y_diast_train = np.array([(i < np.arange(600)) for i in y_train[:, 1]], dtype=np.uint8)
    y_diast_test = np.array([(i < np.arange(600)) for i in y_test[:, 1]], dtype=np.uint8)

    print('Fitting Systole Shapes')
    hist_systole = model_systole.fit_generator(datagen.flow(X_train, y_syst_train, batch_size=batch_size),
                                               samples_per_epoch=X_train.shape[0],
                                               nb_epoch=nb_iter, show_accuracy=False,
                                               validation_data=(X_test, y_syst_test),
                                               callbacks=[systole_checkpointer, systole_checkpointer_best],
                                               nb_worker=1)
    
    print('Fitting Diastole Shapes')
    hist_diastole = model_diastole.fit_generator(datagen.flow(X_train, y_diast_train, batch_size=batch_size),
                                                 samples_per_epoch=X_train.shape[0],
                                                 nb_epoch=nb_iter, show_accuracy=False,
                                                 validation_data=(X_test, y_diast_test),
                                                 callbacks=[diastole_checkpointer, diastole_checkpointer_best],
                                                 nb_worker=1)
   
    loss_systole = hist_systole.history['loss'][-1]
    loss_diastole = hist_diastole.history['loss'][-1]
    val_loss_systole = hist_systole.history['val_loss'][-1]
    val_loss_diastole = hist_diastole.history['val_loss'][-1]

    if calc_crps > 0:
        print('Evaluating CRPS...')
        pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
        pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
        val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
        val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

        # CDF for train and test data (actually a step function)
        cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
        cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

        # CDF for predicted data
        cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
        cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
        cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
        cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

        # evaluate CRPS on training data
        crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
        print('CRPS(train) = {0}'.format(crps_train))

        # evaluate CRPS on test data
        crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
        print('CRPS(test) = {0}'.format(crps_test))

    # save best (lowest) val losses in file (to be later used for generating submission)
    with open('val_loss.txt', mode='w+') as f:
        f.write(str(min(hist_systole.history['val_loss'])))
        f.write('\n')
        f.write(str(min(hist_diastole.history['val_loss'])))
        
    """

Ejemplo n.º 52

0

Mostrar archivo

Archivo: train_model1.py Proyecto: aklasnja/ckme136_w16_01

def train():
    "training ONE model for systole and diastole "
    print('Loading and compiling models...')
    model_systole = get_model1()

    print('Loading models weights...')
    model_systole.load_weights('weights_systole_best.hdf5')
    
    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')			# denoising filter
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    # save test subset
    with open('y_test.txt', mode='w+') as f:
    	f.write(str(y_test))
    	f.write('\n')

    nb_iter = 20
    epochs_per_iter = 1
    batch_size = 32					# if too small, will converge to unreliable models
    								# if too big, it wont fit into memory
    
    calc = 4  # Every n-th iteration (0 if not needed)
    
    # remember min val. losses (best iterations)
    min_val_loss_systole = sys.float_info.max	
							
    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)


        print('Fitting diastole/systole model...')
        hist_systole = model_systole.fit(X_train, y_train[:, :], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, :]))

   	# 	loss function values (RMSE)
        
        loss_last = hist_systole.history['loss'][-1]			# one number for the iter
        val_loss_last = hist_systole.history['val_loss'][-1]
		
									# since hist_systole.history['loss'] returns an array
									# pick the last value with [-1]
        
        loss = hist_systole.history['loss'][:]					# all iter
        val_loss = hist_systole.history['val_loss'][:]

        
        with open('loss_last.txt', mode='a') as f:
        	f.write(str(loss_last))
        	f.write('\n')

        with open('val_loss_last.txt', mode='a') as f:
        	f.write(str(val_loss_last))
        	f.write('\n')        	  

        with open('loss.txt', mode='a') as f:
        	f.write(str(loss))
        	f.write('\n')

        with open('val_loss.txt', mode='a') as f:
        	f.write(str(val_loss))
        	f.write('\n')       

# usually accuracy = correct predictions / total predictions

# using RMSE as a loss function, means if value of loss function is 20 
# - that is an indicator the model usually misses the true value by ~20ml

        if calc > 0 and i % calc == 0:
            print('Getting predictions...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            															# npy 1283 x 2
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            															# npy 320 x 2 
            # save predictions
            
            with open('pred_systole.txt', mode='a') as f:
            	f.write(str(pred_systole))
            	f.write('\n')
            	
            with open('val_pred_systole.txt', mode='a') as f:
            	f.write(str(val_pred_systole))
            	f.write('\n')

   	# 	save weights so they can be loaded later
		
        print('Saving weights...')     
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)

   	# 	for best (lowest) val losses, save weights
        
        if val_loss_last < min_val_loss_systole:
            min_val_loss_systole = val_loss_last
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

   	# 	save best (lowest) val losses in file (to be later used for submission)
        
        with open('min_val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')

Ejemplo n.º 53

0

Mostrar archivo

Archivo: server1.py Proyecto: hellcoderz/vowpal_wabbit_experments

def train(sequenceLabeler, data):
    sequenceLabeler.learn(preprocess([data]))

Ejemplo n.º 54

0

Mostrar archivo

Archivo: predict_box.py Proyecto: jgraving/leap

def predict_box(box_path, model_path, out_path, *, box_dset="/box", epoch=None, verbose=True, overwrite=False, save_confmaps=False):
    """
    Predict and save peak coordinates for a box. 

    :param box_path: path to HDF5 file with box dataset
    :param model_path: path to Keras weights file or run folder with weights subfolder
    :param out_path: path to HDF5 file to save results to
    :param box_dset: name of HDF5 dataset containing box images
    :param epoch: epoch to use if run folder provided instead of Keras weights file
    :param verbose: if True, prints some info and statistics during procesing
    :param overwrite: if True and out_path exists, file will be overwritten
    :param save_confmaps: if True, saves the full confidence maps as additional datasets in the output file (very slow)
    """
    
    if verbose:
        print("model_path:", model_path)
        
    # Find model weights
    model_name = None
    weights_path = model_path
    if os.path.isdir(model_path):
        model_name = os.path.basename(model_path)
        
        weights_paths, epochs, val_losses = find_weights(model_path)
        
        if epoch is None:
            weights_path = weights_paths[np.argmin(val_losses)]
        elif epoch == "final":
            weights_path = os.path.join(model_path, "final_model.h5")
        else:
            weights_path = weights_paths[epoch]
    
    # Input data
    box = h5py.File(box_path,"r")[box_dset]
    num_samples = box.shape[0]
    if verbose:
        print("Input:", box_path)
        print("box.shape:", box.shape)

    # Create output path
    if out_path[-3:] != ".h5":
        if model_name is None:
            out_path = os.path.join(out_path, os.path.basename(box_path))
        else:
            out_path = os.path.join(out_path, model_name, os.path.basename(box_path))
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
    
    model_name = os.path.basename(model_path)

    if verbose:
        print("Output:", out_path)
    
    t0_all = time()
    if os.path.exists(out_path):
        if overwrite:
            os.remove(out_path)
            print("Deleted existing output.")
        else:
            print("Error: Output path already exists.")
            return

    # Load and prepare model
    model = keras.models.load_model(weights_path)
    model_peaks = convert_to_peak_outputs(model, include_confmaps=save_confmaps)
    if verbose:
        print("weights_path:", weights_path)
        print("Loaded model: %d layers, %d params" % (len(model.layers), model.count_params()))
        
    # Load data and preprocess (normalize)
    t0 = time()
    X = preprocess(box[:])
    if verbose:
        print("Loaded [%.1fs]" % (time() - t0))
    
    # Evaluate
    t0 = time()
    if save_confmaps:
        Ypk, confmaps = model_peaks.predict(X)
        
        # Quantize
        confmaps_min = confmaps.min()
        confmaps_max = confmaps.max()
        confmaps = (confmaps - confmaps_min) / (confmaps_max - confmaps_min)
        confmaps = (confmaps * 255).astype('uint8')

        # Reshape
        confmaps = np.transpose(confmaps, (0, 3, 2, 1))
    else:
        Ypk = model_peaks.predict(X)
    prediction_runtime = time() - t0
    if verbose:
        print("Predicted [%.1fs]" % prediction_runtime)
    
    # Save
    t0 = time()
    with h5py.File(out_path, "w") as f:
        f.attrs["num_samples"] = num_samples
        f.attrs["img_size"] = X.shape[1:]
        f.attrs["box_path"] = box_path
        f.attrs["box_dset"] = box_dset
        f.attrs["model_path"] = model_path
        f.attrs["weights_path"] = weights_path
        f.attrs["model_name"] = model_name

        ds_pos = f.create_dataset("positions_pred", data=Ypk[:,:2,:].astype("int32"), compression="gzip", compression_opts=1)
        ds_pos.attrs["description"] = "coordinate of peak at each sample"
        ds_pos.attrs["dims"] = "(sample, [x, y], joint) === (sample, [column, row], joint)"

        ds_conf = f.create_dataset("conf_pred", data=Ypk[:,2,:].squeeze(), compression="gzip", compression_opts=1)
        ds_conf.attrs["description"] = "confidence map value in [0, 1.0] at peak"
        ds_conf.attrs["dims"] = "(sample, joint)"

        if save_confmaps:
            ds_confmaps = f.create_dataset("confmaps", data=confmaps, compression="gzip", compression_opts=1)
            ds_confmaps.attrs["description"] = "confidence maps"
            ds_confmaps.attrs["dims"] = "(sample, channel, width, height)"
            ds_confmaps.attrs["range_min"] = confmaps_min
            ds_confmaps.attrs["range_max"] = confmaps_max

        total_runtime = time() - t0_all
        f.attrs["total_runtime_secs"] = total_runtime
        f.attrs["prediction_runtime_secs"] = prediction_runtime
    
    if verbose:
        print("Saved [%.1fs]" % (time() - t0))

        print("Total runtime: %.1f mins" % (total_runtime / 60))
        print("Performance: %.3f FPS" % (num_samples / total_runtime))

Ejemplo n.º 55

0

Mostrar archivo

Archivo: main.py Proyecto: salceson/PJN

    exit(1)


if __name__ == '__main__':
    if len(sys.argv) < 3:
        _usage(sys.argv)
    action = sys.argv[1]
    k = 0
    if action not in _ACTIONS:
        _usage(sys.argv)
    try:
        k = int(sys.argv[2])
    except ValueError:
        _usage(sys.argv)
    if action == 'preprocess':
        preprocess(_DATA_FILE, encoding=_ENCODING, k=k)
    if action == 'graph':
        print("Loading graphs...")
        with open('data/graphs_%d.dat' % k, 'rb') as f:
            graphs = pickle.loads(f.read())
        while True:
            try:
                index = int(input('Enter note number (ctrl+d to end program): '))
                g = graphs[index]
                print("Writing image to out.png...")
                g.draw('out.png')
                print("Done")
                print()
            except (ValueError, KeyError):
                continue
            except (KeyboardInterrupt, EOFError):

Ejemplo n.º 56

0

Mostrar archivo

Archivo: process_nn_epochs.py Proyecto: kristofvarszegi/mibbci

        # Load the pipeline
        filename_base = '../models/MIBBCI_NN_20160406_14h17m59s'
        nnet, numer, denom, scaler = utils.load_pipeline(filename_base)

        if is_net_to_train_more:

            # Load the training data
            X_train_raw, labels_train = utils.load_data(data_filename_train_list)

            # Preprocess the data
            numer, denom, scaler = utils.init_preprocessors(X_train_raw)
            X_train_preproc, labels_train = utils.preprocess(
                X_train_raw, labels_train,
                decimation_factor=params.DECIMATION_FACTOR_PREPROC,
                tdfilt_numer=numer, tdfilt_denom=denom,
                # reref_channel_id=params.REREF_CHANNEL_ID,
                # power=True,
                # mov_avg_window_size=params.MOVING_AVG_WINDOW_SIZE_SECS,
                scaler=scaler)
            # labels_train = labels_train

            # Epoch the data
            X_epoch_list_train_rh = utils.create_epochs(
                    X_train_preproc, labels_train[:, 0], params.EPOCH_OFFSET_SAMPLES)
            X_epoch_list_train_lh = utils.create_epochs(
                    X_train_preproc, labels_train[:, 1], params.EPOCH_OFFSET_SAMPLES)
            X_epoch_list_train_all = []
            X_epoch_list_train_all.extend(X_epoch_list_train_rh)
            X_epoch_list_train_all.extend(X_epoch_list_train_lh)
            label_list_train_all = []
            label_list_train_all.extend([1.0, 0.0] * len(X_epoch_list_train_rh))

Ejemplo n.º 57

0

Mostrar archivo

Archivo: train.py Proyecto: ReachExceedingGrasp/ImageSegmentationMajor

def train():
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

    
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

    
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

Ejemplo n.º 58

0

Mostrar archivo

Archivo: cut.py Proyecto: chhu0830/NCTU_106-2_machine-learning

            labels.append(label)
            cluster[label] += d.real
            count[label] += 1
        print('Each cluster:', count)
        
        ''' Update centers and check whether it has converged '''
        cluster = [cluster[i] / float(count[i]) for i in range(K)]
        if np.array_equal(center, cluster):
            break
        center = cluster

    return labels

print('Fetching data ...')
t = time()
X_train, T_train = preprocess()
table = dot_table(X_train, X_train, 'dot_table.npy')
print('Time:', time() - t)

if LOAD and os.path.isfile('w.npy') and os.path.isfile('v.npy'):
    print('Loading eigenvector ...')
    t = time()
    w = np.load(open('w.npy', 'rb'))
    v = np.load(open('v.npy', 'rb'))
    print('Time:', time() - t)

else:
    kernel = {'linear':linear, 'rbf':rbf, 'linearbf':linearbf}[KERNEL]
    print('Generating L ...')
    t = time()
    N = len(table)

Ejemplo n.º 59

0

Mostrar archivo

Archivo: SVC.py Proyecto: stegben/Numeri

def getSVC(df, random_split=None):
    X, Y = to_array(df.drop("validation", axis=1))
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    tr_ind = df[df["validation"]==0].index.values.astype(int)
    val_ind = df[df["validation"]==1].index.values.astype(int)
    custom_CV_iterator = [(tr_ind, val_ind)]
    print("Create a Random Forest Classifier")
    print("__Parameter searching...")
    # TODOs: cross-validation for best hyper parameter
    clf = GridSearchCV(SVC(probability=False),
                       param_grid=TUNED_PARAMS,
                       scoring='roc_auc',
                       n_jobs=10, 
                       verbose=5,
                       cv=custom_CV_iterator
                      )
    clf.fit(X, Y)
    print("Best score: {}".format(clf.best_score_))
    print("Best parameters: {}".format(clf.best_params_))
    return clf, scaler


if __name__ == "__main__":
    output_fname = sys.argv[1] 
    df, test_df = preprocess()
    model, scaler = getSVC(df)
    write_ans(model, test_df, ofname=output_fname, scaler=scaler)