Python preprocess 예제들, data.preprocess Python 예제들

예제 #1

0

파일 보기

    def fit(self,
            patches,
            label_input='prob',
            batch_size=8,
            n_epochs=10,
            valid_patches=None,
            valid_label_input='prob',
            **kwargs):

        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)
        X, y = preprocess(patches,
                          n_classes=self.n_classes,
                          label_input=label_input)
        validation_data = None
        if valid_patches is not None:
            validation_data = preprocess(valid_patches,
                                         n_classes=self.n_classes,
                                         label_input=valid_label_input)
            self.valid_score_callback.valid_data = validation_data

        self.model.fit(x=X,
                       y=y,
                       batch_size=batch_size,
                       epochs=n_epochs,
                       verbose=1,
                       callbacks=self.call_backs + [self.valid_score_callback],
                       validation_data=validation_data,
                       **kwargs)

예제 #2

0

파일 보기

def cascade4(filenames, debug = False):
    if debug:
        print("Cascade of 4 processes with queues. read->process->map->reduce")
    lines = [item for sublist in [list(data.extractData(fn)) for fn in filenames] for item in sublist]
    finalDict = {}
    masterq = multiprocessing.Queue()
    lpq = multiprocessing.Queue()
    mapq = multiprocessing.Queue()
    redq = multiprocessing.Queue()

    lineProc = os.fork()
    if lineProc == 0:
        for toProcess in iter(lpq.get, None):
            if(data.preprocess(toProcess) is not None):
                for item in data.preprocess(toProcess):
                    if len(item) > 0:
                        mapq.put(item)
        mapq.put(None)
        time.sleep(0.3)
        os._exit(0)
    else:
        mapProc = os.fork()
        if(mapProc == 0):
            for toMap in iter(mapq.get, None):
                for item in mapper.map(toMap):
                    if len(item) > 0:
                        redq.put(item)
            redq.put(None)
            time.sleep(0.2)
            os._exit(0)
        else:
            reducProc = os.fork()
            if(reducProc == 0):
                r = myReducer.reducer()
                for toReduce in iter(redq.get, None):
                    result = r.onlineReduce(toReduce)
                    masterq.put(result)
                masterq.put(None)
                time.sleep(0.1)
                os._exit(0)
            else:
                for l in lines:
                    lpq.put(l)
                lpq.put(None)
                for k,v in iter(masterq.get, None):
                    finalDict[k] = v
            if debug:
                l = list(finalDict.iteritems())
                print("\t{} files processed. Dictionary of {} instances of {} words made".format(len(filenames), len(l), sum([v for _,v in l])))
            os.wait()
    return

예제 #3

0

파일 보기

def tokenize(input_sentence):
    """Converts an input sentence to a set of tokens after applying preprocessing"""
    preprocessed_sentence = data.preprocess(input_sentence,
                                            remove_punct=True,
                                            lower_case=True)
    tokens = preprocessed_sentence.split()
    return tokens

예제 #4

0

파일 보기

파일: model.py 프로젝트: Aneesh1212/NLP2019

def train(train_csv):
    data = read_csv(train_csv)
    dict_data = preprocess(data)
    X, Y = convert_to_input(dict_data)
    model = model_generate()
    modela.fit(X, Y, epochs=10)
    return (modela)

예제 #5

0

파일 보기

def train(options):
    attributes_train, labels_train = preprocess(load_train(),
                                                normalize=options.normalize)
    attributes_val, labels_val = preprocess(load_val(),
                                            normalize=options.normalize)
    n_attributes = attributes_train.shape[1]
    model = get_model(options, n_attributes)
    model.train(attributes_train, labels_train, attributes_val, labels_val)

    # save model
    if options.save_model is not None:
        model.save(options.save_model)

    # compute validation scores
    predictions_val = model.predict(attributes_val)
    return get_binary_class_scores(labels_val, predictions_val)

예제 #6

0

파일 보기

파일: drive.py 프로젝트: Labonneguigue/CarND-Behavioral-Cloning

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = data["steering_angle"]
        # The current throttle of the car
        throttle = data["throttle"]
        # The current speed of the car
        speed = data["speed"]
        # The current image from the center camera of the car
        imgString = data["image"]
        image = Image.open(BytesIO(base64.b64decode(imgString)))
        image_array = np.asarray(image)
        # Need to preprocess the image the say way the training image have been preprocessed
        image_array = preprocess(image_array)

        steering_angle = float(
            model.predict(image_array[None, :, :, :], batch_size=1))

        throttle = controller.update(float(speed))

        print(steering_angle, throttle)
        send_control(steering_angle, throttle)

        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
    else:
        # NOTE: DON'T EDIT THIS.
        sio.emit('manual', data={}, skip_sid=True)

예제 #7

0

파일 보기

파일: dialog.py 프로젝트: HaarlemTHU/chattingbot

def predict(sentence):
    checkpoint_dir = hparams.ckpt_dir
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
    sentence = preprocess(sentence)

    inputs = [input_token.word_index.get(i, 3) for i in sentence.split(' ')]
    inputs = tf.keras.preprocessing.sequence.pad_sequences(
        [inputs], maxlen=max_length_input, padding='post')
    inputs = tf.convert_to_tensor(inputs)

    result = ''

    hidden = [tf.zeros((1, hparams.units))]
    enc_out, enc_hidden = encoder(inputs, hidden)

    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([target_token.word_index['start']], 0)

    for t in range(max_length_target):
        predictions, dec_hidden, attention_weights = decoder(
            dec_input, dec_hidden, enc_out)

        predicted_id = tf.argmax(predictions[0]).numpy()

        if target_token.index_word[predicted_id] == 'end':
            break
        result += str(target_token.index_word[predicted_id]) + ' '

        dec_input = tf.expand_dims([predicted_id], 0)

    result = result.replace(" ", "")
    return result

예제 #8

0

파일 보기

 def predict(self, patches, label_input='prob'):
     if patches.__class__ is list:
         X, _ = preprocess(patches, label_input=label_input)
         y_pred = self.model.predict(X)
     elif patches.__class__ is np.ndarray:
         y_pred = self.model.predict(patches)
     else:
         raise ValueError("Input format not supported")
     y_pred = scipy.special.softmax(y_pred, -1)
     return y_pred

예제 #9

0

파일 보기

파일: main.py 프로젝트: cxrasdfg/my_faster_rcnn

def test_net():
    data_set = TestDataset()
    data_loader = DataLoader(data_set,
                             batch_size=1,
                             shuffle=True,
                             drop_last=False)

    classes = data_set.classes
    net = MyNet(classes)
    _, _, last_time_model = get_check_point()
    # assign directly
    # last_time_model='./weights/weights_21_110242'

    if os.path.exists(last_time_model):
        model = torch.load(last_time_model)
        if cfg.test_use_offline_feat:
            net.load_state_dict(model)
        else:
            net.load_state_dict(model)
        print("Using the model from the last check point:`%s`" %
              (last_time_model))
    else:
        raise ValueError("no model existed...")
    net.eval()
    is_cuda = cfg.use_cuda
    did = cfg.device_id
    # img_src=cv2.imread("/root/workspace/data/VOC2007_2012/VOCdevkit/VOC2007/JPEGImages/000012.jpg")
    # img_src=cv2.imread('./example.jpg')
    img_src = cv2.imread('./dog.jpg')  # BGR
    img = img_src[:, :, ::-1]  # RGB
    h, w, _ = img.shape
    img = img.transpose(2, 0, 1)  # [c,h,w]

    img = preprocess(img)
    img = img[None]
    img = torch.tensor(img)
    if is_cuda:
        net.cuda(did)
        img = img.cuda(did)
    boxes, labels, probs = net(img, torch.tensor([[w, h]]).type_as(img))[0]

    prob_mask = probs > cfg.out_thruth_thresh
    boxes = boxes[prob_mask]
    labels = labels[prob_mask].long()
    probs = probs[prob_mask]
    draw_box(img_src,
             boxes,
             color='pred',
             text_list=[
                 classes[_] + '[%.3f]' % (__) for _, __ in zip(labels, probs)
             ])
    show_img(img_src, -1)

예제 #10

0

파일 보기

def main():
    options = parse_arguments()
    functional_features, non_functional_features, normal_ff, normal_nff = split_features(load_train(), selected_attack_class=options.attack)
    nff_attributes, labels_mal = preprocess(non_functional_features, normalize=options.normalize)
    normal_attributes, labels_nor = preprocess(normal_nff, normalize=options.normalize)
    n_attributes = nff_attributes.shape[1]
    trainingset = (normal_attributes, nff_attributes, labels_nor, labels_mal)

    functional_features, non_functional_features, normal_ff, normal_nff = split_features(load_val(), selected_attack_class=options.attack)
    nff_attributes, labels_mal = preprocess(non_functional_features, normalize=options.normalize)
    normal_attributes, labels_nor = preprocess(normal_nff, normalize=options.normalize)
    n_attributes = nff_attributes.shape[1]
    validationset = (normal_attributes, nff_attributes, labels_nor, labels_mal)

    model = WGAN(options, n_attributes)
    model.train(trainingset, validationset)

    # save model
    if options.save_model is not None:
        save_model_directory = os.path.join(options.save_model, options.name)
        os.makedirs(save_model_directory, exist_ok=True)
        model.save(save_model_directory)

예제 #11

0

파일 보기

def branching(filenames, debug = False):
    if debug:
        print("Data Extraction Split Randomly Over 4 processes:\n    a  \n   / \ \n  b   c\n /\nd")
    outref = os.fork()
    split1 = chunkList(filenames)
    c = random.choice([0,1])
    if outref == 0:
        message = "\tb"
        fns = split1[c]
    else:
        message = "\ta"
        fns = split1[1 - c]
    lines = [item for sublist in [list(data.extractData(fn)) for fn in fns] for item in sublist]
    split2 = chunkList(lines)
    cc = random.choice([0,1])
    inref = os.fork()
    if inref == 0:
        dlines = split2[cc]
        message = "\tc"
        if outref == 0:
            message = "\td"
    else:
        dlines = split2[1 - cc]

    prep = [item for sublist in 
                [data.preprocess(d) for d in dlines] if
                    sublist is not None 
                for item in sublist
            ]
    mapd = [item for sublist in 
                [list(mapper.map(l)) for l in prep] if
                    len(sublist) > 0
                for item in sublist
            ]
    r = myReducer.reducer()
    for d in mapd:
        r.reduce(d)
    if debug:
        print(message + "({})".format(os.getpid()) + ": ({}|{})".format(fn,len(list(r.dictionary.iteritems()))))
    if(inref == 0):
        os._exit(0)
    else:
        os.wait()
        if(outref == 0):
            os._exit(0)
        else:
            os.wait()
    return

예제 #12

0

파일 보기

파일: drive.py 프로젝트: nuhil/deep-learning-research

def telemetry(sid, data):
    # The current steering angle of the car
    steering_angle = data["steering_angle"]
    # The current throttle of the car
    throttle = data["throttle"]
    # The current speed of the car
    speed = data["speed"]
    # The current image from the center camera of the car
    imgString = data["image"]
    image = Image.open(BytesIO(base64.b64decode(imgString)))
    image_array = preprocess(np.asarray(image))
    transformed_image_array = image_array[None, :, :, :]
    steering_angle = float(model.predict(transformed_image_array, batch_size=1))
    throttle = .2 if float(speed) > 5 else 1.
    print(steering_angle, throttle)
    send_control(steering_angle, throttle)

예제 #13

0

파일 보기

    def train_model(self):
        proportion_labeled = 0.1
        assert proportion_labeled == 0.1

        train_data_np, train_labels_np, test_data_np, test_labels_np = get_mnist_np(
            root='./data', download=True)
        x_labeled, x_unlabelled, x_test, y_labeled, _, y_unlabelled, y_test = preprocess(
            train_data_np=train_data_np,
            train_labels_np=train_labels_np,
            test_data_np=test_data_np,
            test_labels_np=test_labels_np,
            proportion_labeled=proportion_labeled)
        self.x_test = x_test
        self.y_test = y_test

        self.train_model_supervised(x=x_labeled,
                                    y=y_labeled,
                                    num_epochs=self.args.num_epochs)

예제 #14

0

파일 보기

파일: train_semi_supervised_with_schedule_2.py 프로젝트: arturbeg/dni.pytorch

    def train_model(self):
        proportion_labeled = 0.1
        assert proportion_labeled == 0.1

        train_data_np, train_labels_np, test_data_np, test_labels_np = get_mnist_np(
            root='./data', download=True)
        x_labeled, x_unlabelled, x_test, y_labeled, _, y_unlabelled, y_test = preprocess(
            train_data_np=train_data_np,
            train_labels_np=train_labels_np,
            test_data_np=test_data_np,
            test_labels_np=test_labels_np,
            proportion_labeled=proportion_labeled)
        self.x_test = x_test
        self.y_test = y_test

        for i in range(self.args.num_iterations):
            loss, grad_loss = self.train_model_helper(x=x_labeled,
                                                      y=y_labeled,
                                                      is_supervised=True)
            _, _ = self.train_model_helper(
                x=x_unlabelled,
                y=y_unlabelled,
                is_supervised=False,
                weight=self.unlabelled_weight_schedule(i))
            if (i + 1) % 100 == 0:
                print('Iteration [%d/%d], Loss: %.6f, Grad Loss: %.8f' %
                      (i + 1, self.args.num_iterations, loss.item(),
                       grad_loss.item()))

                logging.info('Iteration [%d/%d], Loss: %.6f, Grad Loss: %.8f' %
                             (i + 1, self.args.num_iterations, loss.item(),
                              grad_loss.item()))

                if self.unlabelled_weight_schedule(i) != 0.0:
                    print('Current synthetic gradient weigth is: %.4f' %
                          (self.unlabelled_weight_schedule(i)))
                    logging.info('Current synthetic gradient weigth is: %.4f' %
                                 (self.unlabelled_weight_schedule(i)))

                self.test_model(i + 1)

예제 #15

0

파일 보기

def singleCore(filenames, debug = False, maxTime = 0):
    if debug:
        print("One process")
    initialT = time.time()
    dlines = [item for sublist in [list(data.extractData(fn)) for fn in filenames] for item in sublist]
    stopCondition = False;
    nDatapoints = len(dlines)
    prep = [l for l in [data.preprocess(d) for d in dlines] if l is not None]
    split = [item for sublist in [list(data.splitify(line)) for line in prep] for item in sublist]
    while(not stopCondition):
        nGrams = [item for sublist in 
                    [list(markov.nGrams(l)) for l in split] if
                        len(sublist) > 0
                    for item in sublist
                ]
        mod = markov.markovNGramModel()
        for d in nGrams:
            mod.update(d)
        dlines = [mod.sampleGen() for _ in range(nDatapoints)]
        split = [line.split(" ") for line in dlines]
        if(time.time() - initialT > maxTime):
            stopCondition = True

예제 #16

0

파일 보기

def main():
    "learn and predict"

    def lines(filename):
        with open(filename) as f:
            return f.read().splitlines()

    # read and prepare data
    xtrain, ytrain, xtest, vocab, max_len, n_classes = data.preprocess(
        lines('data/test/xtrain.txt'), lines('data/test/ytrain.txt'),
        lines('data/test/xtest.txt'))

    # compile model
    model = compiled(char_cnn(len(vocab), max_len, n_classes))

    # tensorflow specific, off
    callbacks = []
    if True:
        callbacks.append(TensorBoard(write_images=True))

    # fit model and log out to tensorboard
    history = fit(model, xtrain, ytrain, callbacks)
    model.save_weights('weights.h5')

    # evaluation
    print(history.history)
    with open('metrics.txt', 'w') as f:
        f.write(json.dumps(history.history, indent=1))

    # prediction
    _, ytest = predict(model, xtest)
    with open('ytest.txt', 'w') as f:
        f.write('\n'.join(map(str, ytest)))

    # test set predictions for inspection
    _, ytrain_predicted = predict(model, xtrain)
    with open('ytrain.predicted.txt', 'w') as f:
        f.write('\n'.join(map(str, ytrain_predicted)))

예제 #17

0

파일 보기

파일: create.py 프로젝트: ZhouPeng-NIMST/Precipitation-Nowcasting-1

def extract_into_array(text='Agrimet 15min.csv', imgdir='RADAR DATA/Dataset'):

    data15, minimum, maximum = preprocess(text)

    text = [
        [datetime.datetime.strptime(x, '%m/%d/%Y %H:%M'),
         y]  #convert to datetime every date and time values from CSV file
        for x, y in  #choose all the values
        zip(np.genfromtxt(text, delimiter=',', dtype='string')[:, 0], data15)
    ]  #datetime row

    images = [
        [
            datetime.datetime.
            strptime(  #convert to datetime every date and time values from available image folders
                x[0].split('/')[-1][21:29], '%Y%m%d'),
            x[0]
        ]  #extract dirname alone too
        for x in os.walk(imgdir) if x[0] != imgdir
    ]  #over all folders
    images.sort(key=lambda x: x[1])

    return text, images, minimum, maximum

예제 #18

0

파일 보기

parser.add_argument('--eval_batch_size',
                    type=int,
                    default=32,
                    metavar='N',
                    help='eval batch size')
parser.add_argument('--seed', type=int, default=1234, help='set random seed')
parser.add_argument('--cuda', action='store_true', help='use CUDA device')
parser.add_argument('--gpu_id', type=int, help='GPU device id used')

args = parser.parse_args()

if args.model_type == 'baseline':
    # data preprocess and prepare
    data_path = './data/dev.txt'
    split_ratio = 0.3
    preprocess(data_path, split_ratio)

    # dataset load and plot
    train_dataset = EmojiDataset('./data/Xtrain.npy', './data/ytrain.npy')
    plotdata(np.load('./data/Xtrain.npy', allow_pickle=True),
             np.load('./data/ytrain.npy', allow_pickle=True))
    test_dataset = EmojiDataset('./data/Xtest.npy', './data/ytest.npy')
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.train_batch_size,
                                  shuffle=False,
                                  collate_fn=collate_fn)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.eval_batch_size,
                                 shuffle=False,
                                 collate_fn=collate_fn)

예제 #19

0

파일 보기

파일: main.py 프로젝트: kevinatntu/2020-ADL-Final_News-Standpoint-Style-Transfer


if __name__ == "__main__":
    args = parse_args()
    
    # preprocess and get word dict
    if args.task == 'task3':
        data, label, vocab = preprocess_3label(task=args.task, lang=args.lang)  # data = [train_pos, train_neg, dev_pos, dev_neg, test_pos, test_neg]
        train_pos, train_neg, train_neutral, dev_pos, dev_neg, dev_neutral, _, _, _ = data

        # build datasets
        trainset = ThreeLabelDataset(train_pos, train_neg, train_neutral, vocab, args.max_seq_length)
        valset = ThreeLabelDataset(dev_pos, dev_neg, dev_neutral, vocab, args.max_seq_length)

    else:
        data, label, vocab = preprocess(task=args.task, lang=args.lang)  # data = [train_pos, train_neg, dev_pos, dev_neg, test_pos, test_neg]
        train_pos, train_neg, dev_pos, dev_neg, _, _ = data

        # build datasets
        trainset = TFDataset(train_pos, train_neg, vocab, args.max_seq_length)
        valset = TFDataset(dev_pos, dev_neg, vocab, args.max_seq_length)


    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    ae_model = NewTransformer(vocab.size, device=args.device).to(args.device)
    cls_model = Classifier(latent_size=args.latent_size, output_size=args.label_size).to(args.device)

    args.vocab = vocab
    trainer = Trainer(trainset, valset, ae_model, cls_model, args)

예제 #20

0

파일 보기

def cascadeMarkovSameProcess(filenames, debug = False, maxIterations = -1, maxTime = 0):
    procs = listChunks(range(psutil.cpu_count()), 4)
    q = 0
    if debug:
        print("4 markov models passing each other generated data in a cycle, then rebuilding on the new data:\n    a  \n   / \ \n  b   c\n /\nd".format(maxTime))
    dataq = [multiprocessing.Queue() for _ in range(4)]
    outref = os.fork()
    split1 = chunkList(filenames)
    c = random.choice([0,1])
    if outref == 0:
        message = "\tb"
        fns = split1[c]
        op = psutil.Process(os.getpid())
        op.cpu_affinity(procs[0])
        q = 0
#        op.nice(-10)
    else:
        message = "\ta"
        fns = split1[1 - c]
        ip = psutil.Process(os.getpid())
        q = 1
        ip.cpu_affinity(procs[1])
#        ip.nice(-10)
    lines = [item for sublist in [list(data.extractData(fn)) for fn in fns] for item in sublist]
    split2 = chunkList(lines)
    cc = random.choice([0,1])
    inref = os.fork()
    if inref == 0:
        oip = psutil.Process(os.getpid())
        oip.cpu_affinity(procs[2])
        q = 2
#        oip.nice(-10)
        dlines = split2[cc]
        message = "\tc"
        if outref == 0:
            q = 3
            iip = psutil.Process(os.getpid())
            iip.cpu_affinity(procs[3])
            message = "\td"
    else:
        dlines = split2[1 - cc]
    ic = maxIterations
    stopCondition = False;
    nDatapoints = len(dlines)
    prep = [l for l in [data.preprocess(d) for d in dlines] if l is not None]
    split = [item for sublist in [list(data.splitify(line)) for line in prep] for item in sublist]
    dataq[(q + 1) % 4].put(split)
    time.sleep(3)
    initialT = time.time()
    if debug:
        print("Data preprocessing ({}) complete, starting timer".format(q))
    for toProcess in iter(dataq[q].get, None):
        nGrams = [item for sublist in 
                    [list(markov.nGrams(l)) for l in toProcess] if
                        len(sublist) > 0
                    for item in sublist
                ]
        mod = markov.markovNGramModel()
        for d in nGrams:
            mod.update(d)
        dlines = [mod.sampleGen() for _ in range(100)]
        split = [line.split(" ") for line in dlines]
        if(stopCondition):
            time.sleep(0.1)
            dataq[(q + 1) % 4].put(None)
        else:
            if(debug):
                print("{}: Sample: {}".format(q,dlines[0]))
            dataq[(q + 1) % 4].put([line.split(" ") for line in dlines])
        if(maxIterations >= 0):
            ic -= 1
        if(ic == 0 or time.time() - initialT > maxTime):
            stopCondition = True
    time.sleep(0.1)
    dataq[(q + 1) % 4].put(None)
    if debug:
        print(message + "({})".format(os.getpid()) + ": ({}|{})".format(fn,len(list(mod.model.iteritems()))))
    if(inref == 0):
        os._exit(0)
    else:
        os.wait()
    if(outref == 0):
        os._exit(0)
    return

예제 #21

0

파일 보기

 def preprocessing(self, text_str):
     proc = text.text_to_word_sequence(data.preprocess(text_str))
     tokens = list(map(self.word_to_index, proc))
     return tokens

예제 #22

0

파일 보기

파일: pix2pix_exportcellphone_6.py 프로젝트: yanmenglu/cellSTORM-Tensorflow

node_output = mygraph.get_tensor_by_name('prefix/'+outputs_name+':0')

with tf.Session(graph=mygraph) as sess:
    #simulate network with some data
    
    if(0):
        # Test if localizing from random data works works
        randomstorm = np.random.randn(batch_size,256,256,1)
        randomstorm = randomstorm* (randomstorm > 0.9)
        
        for i_image in range(0, batch_size):
            randomstorm[i_image,:,:,:] = gaussian_filter(randomstorm[i_image,:,:,:], sigma=9) 
        #
        randomstorm = randomstorm-np.min(randomstorm)
        randomstorm = randomstorm/np.max(randomstorm)
        randomstorm =  data.preprocess(randomstorm)
    elif(0):
        # Test if localizing from a TIFF works
        mytiffile_name = 'test_if_it_works.tif'
        import tifffile as tif
        import scipy.misc
        
        randomstorm = np.zeros((batch_size,256,256))
        
        for i_image in range(0, batch_size):
            myframe = tif.imread(mytiffile_name, key=i_image)
            myframe = myframe/np.max(myframe)
            myframe = data.preprocess(myframe)
            # resize to scale_size
            
            myframe = scipy.misc.imresize(myframe, size = (256, 256), interp='bilinear', mode='F')

예제 #23

0

파일 보기

    parser.add_argument('--num-workers', type=int, default=0, metavar='W',
                        help='How many subprocesses to use for data loading (default: 0)')
    parser.add_argument('--epochs', type=int, default=100, metavar='N',
                        help='Number of epochs to train (default: 100)')
    parser.add_argument('--patience', type=int, default=10, metavar='P',
                        help='Number of epochs with no improvement after which training will be stopped (default: 10)')
    parser.add_argument('--lr', type=float, default=0.0001, metavar='LR',
                        help='Learning rate (default: 0.0001)')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='Random seed (default: 1)')
    parser.add_argument('--checkpoint', type=str, default='model.pt', metavar='M',
                        help='checkpoint file name (default: model.pt)')
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    # Data Initialization and Loading
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    preprocess(args.data)
    train_loader, valid_loader = get_train_loaders(
        args.data, device, args.batch_size, args.num_workers, args.class_count)

    # Neural Network and Optimizer
    model = TrafficSignNet().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # Training and Validation
    fit(args.epochs, model, criterion, optimizer,
        train_loader, valid_loader, args.patience, args.checkpoint)

예제 #24

0

파일 보기

파일: model.py 프로젝트: Aneesh1212/NLP2019

def validate_dataset(csv_path):
    data_v = read_csv(csv_path)
    dict_data = preprocess(data_v)
    X_valid, Y_valid = convert_to_input(dict_data)
    return (X_valid, Y_valid)

예제 #25

0

파일 보기

def main():
    # Parser
    parser = parse()
    parser.add_argument('--inference_audio',
                        type=str,
                        default='inference.wav',
                        help='the path of input wav file',
                        required=True)
    parser.add_argument('--plot_path',
                        type=str,
                        default='inference.mp4',
                        help='plot skeleton and add audio')
    parser.add_argument('--output_path',
                        type=str,
                        default='inference.pkl',
                        help='save skeletal data')
    args = parser.parse_args()

    # Device
    if torch.cuda.is_available():
        os.environ["CUDA_DEVICE_ORDER"] = 'PCI_BUS_ID'
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids

    # Load pretrain model
    download_data = Download()
    download_data.pretrain_model()
    checkpoint = torch.load(
        download_data.pretrain_model_dst,
        map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
    keypoints_mean, keypoints_std = checkpoint['keypoints_mean'], checkpoint[
        'keypoints_std']
    aud_mean, aud_std = checkpoint['aud_mean'], checkpoint['aud_std']

    # Audio pre-processing
    aud = preprocess(args.inference_audio, aud_mean, aud_std)

    # Model
    movement_net = MovementNet(
        args.d_input, args.d_output_body, args.d_output_rh, args.d_model,
        args.n_block, args.n_unet, args.n_attn, args.n_head, args.max_len,
        args.dropout, args.pre_lnorm,
        args.attn_type).to('cuda:0' if torch.cuda.is_available() else 'cpu')
    movement_net.load_state_dict(
        checkpoint['model_state_dict']['movement_net'])
    movement_net.eval()

    with torch.no_grad():
        print('inference...')
        X_test = torch.tensor(aud, dtype=torch.float32).to(
            'cuda:0' if torch.cuda.is_available() else 'cpu').unsqueeze(0)
        lengths = X_test.size(1)
        lengths = torch.tensor(lengths).to(
            'cuda:0' if torch.cuda.is_available() else 'cpu')
        lengths = lengths.unsqueeze(0)

        full_output = movement_net.forward(X_test, lengths)

        pred = full_output.squeeze(0)
        pred = pred.data.cpu().numpy()

        # Transform keypoints to world coordinate
        pred = pred * keypoints_std + keypoints_mean
        pred = np.reshape(pred, [len(pred), -1, 3])

    plot(args.inference_audio, args.plot_path, pred)
    with open(args.output_path, 'wb') as f:
        pickle.dump(pred, f)

예제 #26

0

파일 보기

import sys
import config as cfg
import data

## Inputs:
# PathToData: Training Data
# Num: Number of training samples
# StartNum: Start processing from this sample number
## Outputs:
# voxels_preprocessed.vtu

dataPath = cfg.Data_path_ps
num = cfg.num_simulations_ps
startNum = cfg.startNum_simulations_ps + 1

valid = 0
for i in range(startNum, num + startNum):
    print(str(i) + "/" + str(num))
    if data.preprocess(dataPath, i):
        valid += 1

print("Converted {:d} samples.".format(valid))

예제 #27

0

파일 보기

def preprocess_step(data_path, preprocess_cache):
    from data import preprocess
    preprocessed_data_path = preprocess(data_path, preprocess_cache)
    return preprocessed_data_path

예제 #28

0

파일 보기

def cascadeMarkovMapReduce(filenames, debug = False, maxIterations = -1, maxTime = 0):
    procs = listChunks(range(psutil.cpu_count()), 4)
    if debug:
        print("System of 4 processes with queues. map->reduce->markov->sample, running for {} seconds".format(maxTime))
    #Initial Setup: Get the data from the files and split it up
    lines = [item for sublist in [list(data.extractData(fn)) for fn in filenames] for item in sublist]
    prep = [l for l in [data.preprocess(d) for d in lines] if l is not None]
    datalines = [" ".join(item) for sublist in [list(data.splitify(line)) for line in prep] for item in sublist]
    initialSize = len(datalines)
    finalDict = {}
    dataq = multiprocessing.Queue()
    markovq = multiprocessing.Queue()
    selectq = multiprocessing.Queue()
    redq = multiprocessing.Queue()
    sampleq = multiprocessing.Queue()
    for d in datalines:
        dataq.put(d)

    initialT = time.time()
    redProc = os.fork()
    stopCondition = False;
    if redProc == 0:
        rp = psutil.Process(os.getpid())
        rp.cpu_affinity(procs[0])
#        rp.nice(-10)
        red = myReducer.reducer()
        for toProcess in iter(redq.get, None):
            val = [red.onlineReduce(m) for m in toProcess]
            markovq.put(val)
        markovq.put(None)
        time.sleep(0.3)
        os._exit(0)
    else:
        markovProc = os.fork()
        if(markovProc == 0):
            mp = psutil.Process(os.getpid())
            mp.cpu_affinity(procs[1])
#            mp.nice(-10)
            mod = markov.markovNGramModel()
            for toModel in iter(markovq.get, None):
                for ng in markov.nGrams([w for w,_ in toModel]):
                    mod.update(ng)
                scores = {word : score for word,score in toModel}
                samples = [mod.sampleGen(w,) for w,_ in toModel]
                selectq.put((samples,scores))
            selectq.put(None)
            time.sleep(0.2)
            os._exit(0)
        else:
            selectProc = os.fork()
            if(selectProc == 0):
                sp = psutil.Process(os.getpid())
#                sp.nice(-10)
                r = myReducer.reducer()
                for toScore in iter(selectq.get, None):
                    samples = toScore[0]
                    scores = toScore[1]
                    sampleScores = []
                    for s in [w for w in samples]:
                        total = 0
                        for w in samples:
                            if w in scores:
                                total += scores[w]
                        sampleScores.append(total)
                    scoredSamples = sorted(zip(samples,sampleScores), key=lambda t: t[1])
                    coin = random.choice([1,-1])
                    num = random.choice(range(len(samples)))
                    for winner,score in scoredSamples[:coin*num]:
                        sampleq.put(winner)
                sampleq.put(None)
                time.sleep(0.1)
                os._exit(0)
            else:
                dp = psutil.Process(os.getpid())
#                dp.nice(-10)
                count = 0
                t = 0
                while(count < initialSize):
                    count += 1
                    toProcess = dataq.get()
                    maps = [item for item in mapper.map(toProcess)]
                    redq.put(maps)
                    t = time.time() - initialT
                    if(toProcess is None):
                        stopCondition = True
                if(debug):
                    print("{} examples of real data processed in {} seconds".format(count, t))
                    tick = 0
                while(not stopCondition):
                    if(debug):
                        count += 1
                        t = time.time() - initialT
                        if(tick < t // 1):
                            tick = t // 1
                            print("Sample at {} seconds: {}".format(t, toProcess))
                    if(toProcess is None):
                        stopCondition = True
                    if(t > maxTime):
                        stopCondition = True
                    toProcess = sampleq.get()
                    maps = [item for item in mapper.map(toProcess)]
                    redq.put(maps)
                redq.put(None)
                if debug:
                    print("Last Sample: {}".format(toProcess))
                    print("{} examples used, {} samples generated".format(initialSize, count))
                os.wait()
    return

예제 #29

0

파일 보기

파일: model.py 프로젝트: jchen0529/MSiA423_webapp

	
	:param hour: input pandas dataframe
	
	"""

	#split data into test and train
	datatrain = hour[hour["train"] == 1]
	datatest = hour[hour["train"]!=1]
	logging.info('Split data into test and train.')

	#log transform reponse variable "cnt" - bike count
	y = datatrain["cnt"]
	ylabelslog = np.log1p(y)
	X=datatrain.drop(["cnt", "train"], 1)
	logging.info('Applied log transformation to response variable bike count.')

	#train random forest model
	rfmodel = RandomForestRegressor(n_estimators=100)
	rfmodel.fit(X, ylabelslog)
	logging.info('Trained a random forest model.')

	#create pickle file
	model_name = 'rf.pkl'
	model_pkl = open(model_name, 'wb')
	pickle.dump(rfmodel, model_pkl)
	model_pkl.close()
	logging.info('Saved model in a pkl file.')

if __name__ == "__main__":
	rfmodel(data.preprocess("data","hour.csv"))

예제 #30

0

파일 보기

파일: train.py 프로젝트: nyijian/SofaSofa_competition

import tensorflow as tf
import numpy as np
import data
import network
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

rate = 0.01
batch_size = 1000
train_step = 10000
filepath = "./data/train.csv"

data_load = data.load_data(filepath)
features, labels = data.preprocess(data_load)
labels = np.reshape(labels, (-1, 1))
# features_train,features_validate,labels_train,labels_validate = train_test_split(features,labels,test_size=0.3)

x = tf.placeholder(tf.float32, [None, 57], name="x")
y = tf.placeholder(tf.float32, [None, 1])

pred = network.network(x)
loss = tf.reduce_mean(tf.abs(y - pred))
train_opration = tf.train.AdamOptimizer(rate).minimize(loss)
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(train_step):
        features_train, labels_train = shuffle(features, labels)
        sess.run(train_opration,
                 feed_dict={