예제 #1
0
class PositionAdapter:
    def __init__(self,
                 tip_names=[
                     "thumbTip_IK",
                     "indexTip_IK",
                     "middleTip_IK",
                     "ringTip_IK",
                     "littleTip_IK",
                 ],
                 arm_name="arm",
                 hand_name="hand"):

        ctrl = bge.logic.getCurrentController()
        self._ob = ctrl.owner
        self._bone_channels = [self._ob.channels[tip] for tip in tip_names]
        self._bone_channels.append(self._ob.channels[arm_name])
        self._hand_name = hand_name
        self._hand_channel = self._ob.channels[hand_name]
        self._transformer = Transformer()

    def translate(self, matrix):
        # TODO: query arm lenght instead fix value
        matrix[5][1] -= 2.41533  # modify arm position with arm lenght

        # translate finger and arm
        for i in range(len(matrix) - 1):
            loc = matrix[i]
            self._transformer.translate(self._bone_channels[i], self._ob, loc)

        # rotate hand
        rotation = matrix[-1]
        self._transformer.rotate(self._hand_channel, rotation)

        # update
        self._ob.update()
예제 #2
0
 def run_transform(self):
     """Runs the basic FFT demonstration"""
     name = self.box.get()
     try:
         worker = Transformer(name)
         worker.run()
         del worker, name
         gc.collect()
     except FileNotFoundError as er:
         print(er)
예제 #3
0
 def run_show_primitives():
     """Demonstrates the FT images of primitive forms"""
     transformers = [
         Transformer('round.jpg'),
         Transformer('square.png'),
         Transformer('triangle.png')
     ]
     for trans in transformers:
         trans.plot()
         trans.transform().shift()
         trans.plot_fft('Shifted FT of')
     Transformer.show_all()
     transformers.clear()
     del transformers
     gc.collect()
예제 #4
0
    def run_filter(self):
        """Runs the filter class work demonstration"""
        name = self.box.get()
        try:
            fraction = float(self.fracbox.get())
            fl1 = fl.Filter(name, fraction)
            fl1.plot(name)
            fl1.low_pass_filter()

            fl2 = fl.Filter(name, fraction)
            fl2.high_pass_filter()
            Transformer.show_all()
            del fl1, fl2, name
            gc.collect()
        except Exception as er:
            print(er)
예제 #5
0
    def fit(self, Xmask, y):
        pr = prepare.Prepare_0(model=10,
                               preproc=1,
                               min_df=1,
                               use_svd=False,
                               tfidf=2,
                               stemmer=0)
        (X_all_df, _, BP, params) = pr.load_transform(update=False)
        names = list(X_all_df.columns)
        X_all = np.asarray(X_all_df)
        self.X_all, self.names = X_all, names

        clf0 = GaussianNB()
        clf1 = MultinomialNB(alpha=0.8)
        clf2 = BernoulliNB(alpha=1, binarize=0.01)

        clf = clf1
        self.rd = Pipeline([
            ("trans", Transformer(names=self.names, X_all=X_all, BP=BP)),
            #("scaler",StandardScaler(with_mean=False)),
            ("est", clf)
        ])

        self.rd.fit(Xmask, np.asarray(y))
        return self
예제 #6
0
  def fit(self, Xmask, y):
    pr = self._get_featureset()
    (X_all_df,_,BP,params) = pr.load_transform(update=False)
    names = list(X_all_df.columns)
    X_all = np.asarray(X_all_df)
    self.X_all, self.names = X_all, names

    logger.debug('Fit: use_stats=%s,use_table=%s,predict_bp=%s,use_scaler=%s',
        self.use_stats,self.use_table,self.predict_bp,self.use_scaler)
    logger.debug('Fit: bst_bgram=%s,bst_minc=%s,bst_title=%s,bst_body=%s,bst_url=%s',
        self.bst_bgram,self.bst_minc,self.bst_title,self.bst_body,self.bst_url)
    
    PipelineList = []
    PipelineList.append( ("trans", Transformer(names=self.names, use_best=self.use_best, 
                    use_bp=self.use_bp, use_stats=self.use_stats, use_table=self.use_table, 
                    bst_bgram=self.bst_bgram,bst_minc=self.bst_minc,bst_title=self.bst_title,
                    bst_body=self.bst_body,bst_url=self.bst_url,
                    predict_bp=self.predict_bp, bp_clfs=self.bp_clfs, X_all=X_all, BP=BP)) )
    if self.use_scaler > 0:
        PipelineList.append( ("scaler",StandardScaler(with_mean=(self.use_scaler>1))) )
    self._pipeline_append(PipelineList)
    PipelineList.append( ("est", self._get_clf(self.clf)) )
    
    self.rd = Pipeline(PipelineList)
    logger.debug('Pipline: %s',[(k,v.__class__.__name__) for k,v in PipelineList])
    logger.debug("Pipeline.estimator=%s",dict(PipelineList)['est'])

    self.rd.fit(Xmask,np.asarray(y))
    return self
예제 #7
0
파일: launch.py 프로젝트: hkrd/weather-etl
def get_hottest_day_formatted():
    """
    Finds the hottest day from the two .csv data files, using the output from the Transformer.
    Prints a formatted result which is more convenient for the user.
    """

    transformer = Transformer()
    hottest_day = transformer.find_hottest_day()
    # rename the columns to more understandable names
    hottest_day.columns = ['Date', 'Temperature', 'Region']
    print(
        hottest_day.to_string(formatters={
            "Date":
            lambda x: "{:%d-%m-%Y}".format(pd.to_datetime(x))
        },
                              index=False))
예제 #8
0
    def __init__(self,
                 tip_names=[
                     "thumbTip_IK",
                     "indexTip_IK",
                     "middleTip_IK",
                     "ringTip_IK",
                     "littleTip_IK",
                 ],
                 arm_name="arm",
                 hand_name="hand"):

        ctrl = bge.logic.getCurrentController()
        self._ob = ctrl.owner
        self._finger_channels = [self._ob.channels[tip] for tip in tip_names]
        self._arm_channels = self._ob.channels[arm_name]
        self._hand_name = hand_name
        self._hand_channel = self._ob.channels[hand_name]
        self._transformer = Transformer()
예제 #9
0
    def fit(self, Xmask, y):
        pr = prepare.Prepare_0(model=14,
                               n_components=512,
                               preproc=1,
                               min_df=1,
                               use_svd=True,
                               tfidf=2,
                               stemmer=0)
        (X_all_df, _, BP, params) = pr.load_transform(update=False)
        names = list(X_all_df.columns)
        X_all = np.asarray(X_all_df)
        self.X_all, self.names = X_all, names

        clf1 = lm.LogisticRegression(penalty='l2',
                                     dual=True,
                                     tol=0.00001,
                                     C=1,
                                     fit_intercept=True,
                                     intercept_scaling=1.0,
                                     class_weight=None,
                                     random_state=random_state)

        class LassoCV_proba(lm.LassoCV):
            def predict_proba(self, X):
                print 'alpha_:', self.alpha_
                y = self.predict(X)
                y = 1. / (1 + np.exp(-(y - 0.5)))
                return np.vstack((1 - y, y)).T

        class RidgeCV_proba(lm.RidgeCV):
            def predict_proba(self, X):
                print 'alpha_:', self.alpha_
                y = self.predict(X)
                if 0:
                    y_min, y_max = y.min(), y.max()
                    if y_max > y_min:
                        y = (y - y_min) / (y_max - y_min)
                else:
                    y = 1. / (1 + np.exp(-(y - 0.5)))
                return np.vstack((1 - y, y)).T

        clf2 = RidgeCV_proba(alphas=np.linspace(0, 10), cv=4)
        clf3 = LassoCV_proba(alphas=None, cv=4)
        clf4 = svm.SVR(C=3, kernel='linear')

        clf = clf1

        self.rd = Pipeline([
            ("trans", Transformer(names=self.names, X_all=X_all, BP=BP)),
            #("scaler",StandardScaler(with_mean=False)),
            #("filter",lm.LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=1, fit_intercept=True, intercept_scaling=1.0, class_weight=None, random_state=random_state)),
            ("est", clf)
        ])

        self.rd.fit(Xmask, np.asarray(y))
        return self
예제 #10
0
def main():
    inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE)
    inp.setchannels(CHANNELS)
    inp.setrate(RATE)
    inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    inp.setperiodsize(CHUNK)

    out = alsaaudio.PCM(alsaaudio.PCM_PLAYBACK)
    out.setchannels(CHANNELS)
    out.setrate(RATE)
    out.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    out.setperiodsize(CHUNK + 1000)
    transformer = Transformer(pitch.pitch_up)

    while True:
        l, data = inp.read()
        if data:
            transformed = transformer.transform(data)
            out.write(transformed)
예제 #11
0
def main():
    inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE)
    inp.setchannels(CHANNELS)
    inp.setrate(RATE)
    inp.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    inp.setperiodsize(CHUNK)

    out = alsaaudio.PCM(alsaaudio.PCM_PLAYBACK)
    out.setchannels(CHANNELS)
    out.setrate(RATE)
    out.setformat(alsaaudio.PCM_FORMAT_S16_LE)
    out.setperiodsize(CHUNK + 1000)
    transformer = Transformer(pitch.pitch_up)

    while True:
        l, data = inp.read()
        if data:
            transformed = transformer.transform(data)
            out.write(transformed)
예제 #12
0
파일: actor.py 프로젝트: lzw-pku/AutoML
    def __init__(self, args):
        self.estimator = Estimator(emb_dim=args.emb_dim,
                                   n_hidden=args.n_hidden,
                                   bidirectional=args.bi,
                                   n_layer=args.n_layer,
                                   dropout=args.dropout,
                                   lr=args.lr,
                                   decay=args.decay,
                                   lr_p=args.lr_p,
                                   clip=args.clip,
                                   batch_size=args.batch,
                                   epoch_num=args.epoch_num,
                                   cuda=args.cuda,
                                   path=args.path)

        self.transformer = Transformer(prolog_grammar.GRAMMAR_DICTIONARY,
                                       prolog_grammar.ROOT_RULE)

        self.performances = []
        self.actions = []
        self.path = args.path
예제 #13
0
 def run_timing():
     """Runs a timing analysis of the FTT algorithm"""
     try:
         screwdriver = Transformer('6A(142).BMP')  # 320x240 = 76800 p
         vase = Transformer('vase.jpg')  # 320x400 = 128000 p
         city = Transformer('DUSS.BMP')  # 672x473 = 317856 p
         round_tr = Transformer('round.jpg')  # 800x800 = 640000 p
         dew = Transformer(
             'jankaluza_dew_drop.jpg')  # 3840x2562 = 9838080 p
         fire = Transformer('vovalente_fire.jpg')  # 5077x3385 = 17185645 p
     except FileNotFoundError as er:
         print(er)
         sys.exit()
     with open('timing-fft.txt', 'w') as out:
         out.write('# number TIME\n')
     transformers = [screwdriver, vase, city, round_tr, dew, fire]
     sizes = [76800, 128000, 317856, 640000, 9838080, 17185645]
     for size, transformer in zip(sizes, transformers):
         start = time.time()
         transformer.transform()
         end = time.time()
         with open('timing-fft.txt', 'a') as log:
             log.write(str(size) + ' ' + str(end - start) + '\n')
     transformers.clear()
     gc.collect()
     print('Timing completed successfully')
예제 #14
0
class TestTransformer(TestCase):

    def setUp(self):
        self.transformer = Transformer()
        self.project_dir = os.path.abspath(__file__ + "/../../")

    def tearDown(self):
        for f in glob.glob(self.project_dir + "/resources/*.parquet.gzip"):
            os.remove(f)

    def test___init__(self):
        result = self.transformer.find_hottest_day()
        print(result)
        self.assertEqual(1, len(result.index))
        self.assertEqual('2016-03-17T00:00:00', str(result['ObservationDate'].iloc[0]))
        self.assertEqual(15.8, float(result['ScreenTemperature'].iloc[0]))
        self.assertEqual('Highland & Eilean Siar', str(result['Region'].iloc[0]))
예제 #15
0
class PositionAdapter:
    def __init__(self,
                 tip_names=[
                     "thumbTip_IK",
                     "indexTip_IK",
                     "middleTip_IK",
                     "ringTip_IK",
                     "littleTip_IK",
                 ],
                 arm_name="arm",
                 hand_name="hand"):

        ctrl = bge.logic.getCurrentController()
        self._ob = ctrl.owner
        self._finger_channels = [self._ob.channels[tip] for tip in tip_names]
        self._arm_channels = self._ob.channels[arm_name]
        self._hand_name = hand_name
        self._hand_channel = self._ob.channels[hand_name]
        self._transformer = Transformer()

    def translate(self, matrix):
        # TODO: query arm lenght instead fix value
        # matrix[5][1] -= 2.41533 # modify arm position with arm lenght
        print("update 0", self._finger_channels[0].location)

        # translate arm
        self._transformer.translate(self._arm_channels, self._ob, matrix[-2])

        # rotate hand
        rotation = matrix[-1]
        self._transformer.rotate(self._arm_channels, rotation)

        # update
        self._ob.update()
        print('f1', self._finger_channels[0].location)
        print('f1', self._finger_channels[0].rotation_quaternion)
        # translate finger
        for i in range(len(matrix) - 2):
            loc = matrix[i]
            self._transformer.translate(self._finger_channels[i], self._ob,
                                        loc)

        # update
        self._ob.update()
        print('f2', self._finger_channels[0].location)
예제 #16
0
    def _transform_dataset_test(self, isLocal: bool, dataset_name: str, output_format: str):
        paths = list(get_paths(isLocal, dataset_name, output_format))
        transform = Transformer(self._spark, paths[0])
        paths.pop(0)

        if dataset_name == 'country':
            temperature_input_path, country_dict_input_path, output_path = paths
            data = transform.transform_country(country_dict_input_path, temperature_input_path, output_path)
        elif dataset_name == 'airport':
            input_path, output_path = paths
            data = transform.transform_airports(input_path, output_path)
        elif dataset_name == 'us_state':
            demographics_input_path, us_state_dict_input_path, output_path = paths
            data = transform.transform_us_state(us_state_dict_input_path, demographics_input_path, output_path)
        elif dataset_name == 'immigration':
            input_path, date_output_path, img_output_path = paths
            data = transform.transform_immigration(input_path, img_output_path, date_output_path)

        self.assertTrue(data is not None)
예제 #17
0
파일: main.py 프로젝트: cosmicBboy/self
        params['url'] = data['url']

        load = Loader()
        load.setParams(params)
        data = load.loadHtml().data

        # ------------------ #
        # Transform the data #
        # ------------------ #

        # setting parameters
        jsonFp = './states.json'
        colName = 'State:'  # column name to filter data

        # instantiate transformer
        transform = Transformer(data)
        transform.setTransformDictionary(jsonFp)

        # get long values of the states
        statesDict = transform.flipTransformDictionary()
        states = statesDict.keys()

        # filter the data
        transform.filterData(colName, states)
        statesAbbr = [statesDict[state] for state in transform.data[colName]]
        transform.addCol(1, 'StateAbbr', statesAbbr)

        # rename column values
        transform.renameCol(0, 'State')

        # set index
def train(net, train_set, valid_set, train_params, logger=None, prefix=''):
    # unpack arguments
    x_train, y_train = train_set
    x_valid, y_valid = valid_set
    # convert to bc01 order, train set will be converted in Transformer
    #x_valid = np.rollaxis(x_valid, 3, 1)
    x_valid = x_valid[:, np.newaxis, ...]

    BATCH_SIZE = train_params['batch_size']
    IMAGE_SIZE = train_params['image_size']
    MOMENTUM = train_params['momentum']
    MAX_EPOCH = train_params['epochs']
    LEARNING_RATE_SCHEDULE = train_params['lr_schedule']
    L2 = train_params.get('L2', 0.)
    output = net['output']

    print("Starting dataset loader...")
    queue = Queue(5)
    transform = Transformer(x_train, y_train, queue, batch_size=BATCH_SIZE)
    transform.start()



    # allocate symbolic variables for theano graph computations
    batch_index = T.iscalar('batch_index')
    X_batch = T.tensor4('x')
    y_batch = T.fmatrix('y')

    # allocate shared variables for images, labels and learing rate
    x_shared = theano.shared(np.zeros((BATCH_SIZE, 1, IMAGE_SIZE, IMAGE_SIZE), dtype=theano.config.floatX),
                             borrow=True)
    y_shared = theano.shared(np.zeros((BATCH_SIZE, 2), dtype=theano.config.floatX),
                             borrow=True)

    learning_rate = theano.shared(np.float32(LEARNING_RATE_SCHEDULE[0]))

    out_train = lasagne.layers.get_output(output, X_batch, deterministic=False)
    out_val = lasagne.layers.get_output(output, X_batch, deterministic=True)

    loss_train = T.mean(lasagne.objectives.squared_error(out_train, y_batch))# + L2 * regularize_network_params(output, l2)
    loss_val = T.mean(lasagne.objectives.squared_error(out_val, y_batch))# + L2 * regularize_network_params(output, l2)

    # collect all model parameters
    all_params = lasagne.layers.get_all_params(output)
    # generate parameter updates for SGD with Nesterov momentum
    updates = lasagne.updates.nesterov_momentum(
        loss_train, all_params, learning_rate, MOMENTUM)

    logger.info("Compiling theano functions...")
    # create theano functions for calculating losses on train and validation sets
    iter_train = theano.function(
        [],
        [loss_train],
        updates=updates,
        givens={
            X_batch: x_shared, #[batch_index * BATCH_SIZE: (batch_index + 1) * BATCH_SIZE],
            y_batch: y_shared, #[batch_index * BATCH_SIZE: (batch_index + 1) * BATCH_SIZE],
            },
        )
    iter_valid = theano.function(
        [],
        [loss_val, out_val],
        givens={
            X_batch: x_shared,
            y_batch: y_shared,
            },
        )

    ###################
    # Actual training #
    ###################

    n_train_batches = x_train.shape[0] // BATCH_SIZE
    n_val_batches = x_valid.shape[0] // BATCH_SIZE
    # keep track of networks best performance and save net configuration
    best_epoch = 0
    best_valid = 1.
    best_auc = 0.
    # epoch and iteration counters
    epoch = 0
    _iter = 0
    # wait for at least this many epochs before saving the model
    min_epochs = 0
    # store these values for learning curves plotting
    train_loss = []
    valid_loss = []
    aucs = []

    # wait for this many epochs if the validation error is not increasing
    patience = 10
    now = time.time()
    logger.info("| Epoch | Train err | Validation err | ROC AUC | Ratio |  Time  |")
    logger.info("|---------------------------------------------------------------|")

    try:
        # get next chunks of data
        while epoch < MAX_EPOCH:
            if epoch in LEARNING_RATE_SCHEDULE:
                learning_rate.set_value(LEARNING_RATE_SCHEDULE[epoch])
            epoch += 1
            x_next, y_next = queue.get()

            losses = []
            while x_next is not None:
                
                x_shared.set_value(x_next, borrow=True)
                y_shared.set_value(y_next, borrow=True)
                l = iter_train()
                losses.append(l)
                x_next, y_next = queue.get()

            avg_train_loss = np.mean(losses)

            # average the predictions across 5 patches: corners and center
            losses = []
            for idx in xrange(n_val_batches - 1):
                x_shared.set_value(x_valid[idx * BATCH_SIZE: (idx + 1) * BATCH_SIZE])
                y_shared.set_value(y_valid[idx * BATCH_SIZE: (idx + 1) * BATCH_SIZE])
                vloss, out_val = iter_valid()
                losses.append(vloss)

            avg_valid_loss = np.mean(losses)

            logger.info("|%6d | %9.6f | %14.6f | %7.5f | %1.3f | %6d |" %
                        (epoch,
                         avg_train_loss,
                         avg_valid_loss,
                         0,
                         avg_valid_loss / avg_train_loss,
                         time.time() - now))
            # keep track of these for future analysis
            train_loss.append(avg_train_loss)
            valid_loss.append(avg_valid_loss)

            # if this is the best kappa obtained so far
            # save the model to make predictions on the test set
            # if auc > best_auc:
            #     # always wait for min_epochs, to avoid frequent saving
            #     # during early stages of learning
            #     if epoch >= min_epochs:
            #         save_network(net, filename=os.path.join(prefix, 'net.pickle'))
            #         np.save(os.path.join(prefix, "val_predictions.npy"), valid_probas)
            #         valid_features = feats / 5
            #         np.save(os.path.join(prefix, "val_features.npy"), valid_features)
            #     best_auc = auc
            #     best_epoch = epoch
            #     patience = 10
    except KeyboardInterrupt:
        logger.info("Trainig interrupted on epoch %d" % epoch)

    elapsed_time = time.time() - now
    logger.info("The best auc: %.5f obtained on epoch %d.\n The training took %d seconds." %
          (best_auc, best_epoch, elapsed_time))
    logger.info(" The average performance was %.1f images/sec" % (
        (len(x_train) + len(y_train)) * float(epoch) / elapsed_time))

    results = np.array([train_loss, valid_loss, aucs], dtype=np.float)
    np.save(os.path.join(prefix, "training.npy"), results)
    transform.terminate()
    transform.join()
예제 #19
0
from transform import Transformer

if __name__ == '__main__':
    shapes = ['round.jpg', 'square.png', 'triangle.png']
    for sh in shapes:
        trans = Transformer(sh)
        trans.transform()
        trans.plot_fft('FT of')
        trans.shift()
        trans.plot_fft('Shifted FT of')
    Transformer.show_all()

    tr = Transformer('vase.jpg')
    tr2 = Transformer('face.jpg')
    tr.plot('Original vase')
    tr2.plot('Original face')
    tr.transform()
    tr.plot_fft('FT')
    tr2.transform()
    tr2.plot_fft('FT')
    tr.shift()
    tr.plot_fft('Shifted FT')
    tr2.shift()
    tr2.plot_fft('Shifted FT')
    Transformer.show_all()
예제 #20
0
파일: filter.py 프로젝트: AntonyBazin/FFT
        super().inverse()
        self.plot('Low pass filtered image',
                  'gray',
                  False)

    def high_pass_filter(self):
        """This method applies the high pass filter to the image"""
        super().transform()
        self._data[0:int(self.r * self.fraction), 0:int(self.c * self.fraction)] = 0
        self._data[int(self.r * (1 - self.fraction)):self.r, 0:int(self.c * self.fraction)] = 0
        self._data[0:int(self.r * self.fraction), int(self.c * (1 - self.fraction)):self.c] = 0
        self._data[int(self.r * (1 - self.fraction)):self.r, int(self.c * (1 - self.fraction)):self.c] = 0
        self._plotting = np.abs(self._data)
        self.plot_fft('High pass filtered spectrum')
        self.shift()
        self.plot_fft('High pass filtered spectrum with shift')
        super().inverse()
        self.plot('High pass filtered image',
                  'gray',
                  False)


if __name__ == '__main__':
    fl1 = Filter('6A(142).BMP', 0.07)
    fl1.plot('6A(142).BMP')
    fl1.low_pass_filter()

    fl2 = Filter('6A(142).BMP', 0.07)
    fl2.high_pass_filter()
    Transformer.show_all()
예제 #21
0
target_dict_list = final_dict_list if full_soql_query_mode else envdata

opps = target_dict_list[0]['records']
service_orders = target_dict_list[2]['records']
quotes = target_dict_list[5]['records']
cor_forms = target_dict_list[6]['records']
cap_projects = target_dict_list[3]['records']
expense_builders = target_dict_list[4]['records']
npv_tasks = target_dict_list[1]['records']

print(
    'All data successfully queried. Any errors after this point are due to DATA VALIDATION ONLY.'
)

t = Transformer(opps, service_orders, quotes, cor_forms, cap_projects,
                expense_builders)
valid_opp_to_service_orders = t.validate_opp_to_service_order()
valid_opp_to_quote_or_cor_form = t.validate_opp_to_quote_or_cor_form(
    valid_opp_to_service_orders)
standardized_opp_to_cp_or_eb = t.standardize_opp_to_cp_or_eb(
    valid_opp_to_quote_or_cor_form)
valid_opp_to_cp_or_eb = t.validate_opp_to_cp_or_eb(
    valid_opp_to_quote_or_cor_form, standardized_opp_to_cp_or_eb)
# print(valid_opp_to_cp_or_eb)

## All validation stages passed, applicable NPV tasks can now be closed:
l = Loader(valid_opp_to_cp_or_eb, npv_tasks) if full_soql_query_mode else None
tasks_closed = l.load_tasks() if full_soql_query_mode else []
if len(tasks_closed) == 0:
    print('0 NPV tasks validated by automation.')
elif len(tasks_closed) > 0:
예제 #22
0
파일: train.py 프로젝트: Keesiu/meta-kaggle
def train(net, train_set, valid_set, train_params, logger=None, prefix=''):
    # unpack arguments
    x_train, y_train = train_set
    x_valid, y_valid = valid_set
    # convert to bc01 order, train set will be converted in Transformer
    #x_valid = np.rollaxis(x_valid, 3, 1)
    x_valid = x_valid[:, np.newaxis, ...]

    BATCH_SIZE = train_params['batch_size']
    IMAGE_SIZE = train_params['image_size']
    MOMENTUM = train_params['momentum']
    MAX_EPOCH = train_params['epochs']
    LEARNING_RATE_SCHEDULE = train_params['lr_schedule']
    L2 = train_params.get('L2', 0.)
    output = net['output']

    print("Starting dataset loader...")
    queue = Queue(5)
    transform = Transformer(x_train, y_train, queue, batch_size=BATCH_SIZE)
    transform.start()

    # allocate symbolic variables for theano graph computations
    batch_index = T.iscalar('batch_index')
    X_batch = T.tensor4('x')
    y_batch = T.fmatrix('y')

    # allocate shared variables for images, labels and learing rate
    x_shared = theano.shared(np.zeros((BATCH_SIZE, 1, IMAGE_SIZE, IMAGE_SIZE),
                                      dtype=theano.config.floatX),
                             borrow=True)
    y_shared = theano.shared(np.zeros((BATCH_SIZE, 2),
                                      dtype=theano.config.floatX),
                             borrow=True)

    learning_rate = theano.shared(np.float32(LEARNING_RATE_SCHEDULE[0]))

    out_train = lasagne.layers.get_output(output, X_batch, deterministic=False)
    out_val = lasagne.layers.get_output(output, X_batch, deterministic=True)

    loss_train = T.mean(lasagne.objectives.squared_error(
        out_train, y_batch))  # + L2 * regularize_network_params(output, l2)
    loss_val = T.mean(lasagne.objectives.squared_error(
        out_val, y_batch))  # + L2 * regularize_network_params(output, l2)

    # collect all model parameters
    all_params = lasagne.layers.get_all_params(output)
    # generate parameter updates for SGD with Nesterov momentum
    updates = lasagne.updates.nesterov_momentum(loss_train, all_params,
                                                learning_rate, MOMENTUM)

    logger.info("Compiling theano functions...")
    # create theano functions for calculating losses on train and validation sets
    iter_train = theano.function(
        [],
        [loss_train],
        updates=updates,
        givens={
            X_batch:
            x_shared,  #[batch_index * BATCH_SIZE: (batch_index + 1) * BATCH_SIZE],
            y_batch:
            y_shared,  #[batch_index * BATCH_SIZE: (batch_index + 1) * BATCH_SIZE],
        },
    )
    iter_valid = theano.function(
        [],
        [loss_val, out_val],
        givens={
            X_batch: x_shared,
            y_batch: y_shared,
        },
    )

    ###################
    # Actual training #
    ###################

    n_train_batches = x_train.shape[0] // BATCH_SIZE
    n_val_batches = x_valid.shape[0] // BATCH_SIZE
    # keep track of networks best performance and save net configuration
    best_epoch = 0
    best_valid = 1.
    best_auc = 0.
    # epoch and iteration counters
    epoch = 0
    _iter = 0
    # wait for at least this many epochs before saving the model
    min_epochs = 0
    # store these values for learning curves plotting
    train_loss = []
    valid_loss = []
    aucs = []

    # wait for this many epochs if the validation error is not increasing
    patience = 10
    now = time.time()
    logger.info(
        "| Epoch | Train err | Validation err | ROC AUC | Ratio |  Time  |")
    logger.info(
        "|---------------------------------------------------------------|")

    try:
        # get next chunks of data
        while epoch < MAX_EPOCH:
            if epoch in LEARNING_RATE_SCHEDULE:
                learning_rate.set_value(LEARNING_RATE_SCHEDULE[epoch])
            epoch += 1
            x_next, y_next = queue.get()

            losses = []
            while x_next is not None:

                x_shared.set_value(x_next, borrow=True)
                y_shared.set_value(y_next, borrow=True)
                l = iter_train()
                losses.append(l)
                x_next, y_next = queue.get()

            avg_train_loss = np.mean(losses)

            # average the predictions across 5 patches: corners and center
            losses = []
            for idx in xrange(n_val_batches - 1):
                x_shared.set_value(x_valid[idx * BATCH_SIZE:(idx + 1) *
                                           BATCH_SIZE])
                y_shared.set_value(y_valid[idx * BATCH_SIZE:(idx + 1) *
                                           BATCH_SIZE])
                vloss, out_val = iter_valid()
                losses.append(vloss)

            avg_valid_loss = np.mean(losses)

            logger.info("|%6d | %9.6f | %14.6f | %7.5f | %1.3f | %6d |" %
                        (epoch, avg_train_loss, avg_valid_loss, 0,
                         avg_valid_loss / avg_train_loss, time.time() - now))
            # keep track of these for future analysis
            train_loss.append(avg_train_loss)
            valid_loss.append(avg_valid_loss)

            # if this is the best kappa obtained so far
            # save the model to make predictions on the test set
            # if auc > best_auc:
            #     # always wait for min_epochs, to avoid frequent saving
            #     # during early stages of learning
            #     if epoch >= min_epochs:
            #         save_network(net, filename=os.path.join(prefix, 'net.pickle'))
            #         np.save(os.path.join(prefix, "val_predictions.npy"), valid_probas)
            #         valid_features = feats / 5
            #         np.save(os.path.join(prefix, "val_features.npy"), valid_features)
            #     best_auc = auc
            #     best_epoch = epoch
            #     patience = 10
    except KeyboardInterrupt:
        logger.info("Trainig interrupted on epoch %d" % epoch)

    elapsed_time = time.time() - now
    logger.info(
        "The best auc: %.5f obtained on epoch %d.\n The training took %d seconds."
        % (best_auc, best_epoch, elapsed_time))
    logger.info(" The average performance was %.1f images/sec" %
                ((len(x_train) + len(y_train)) * float(epoch) / elapsed_time))

    results = np.array([train_loss, valid_loss, aucs], dtype=np.float)
    np.save(os.path.join(prefix, "training.npy"), results)
    transform.terminate()
    transform.join()
예제 #23
0
from transform import Transformer
import time
import sys

if __name__ == '__main__':
    try:
        screwdriver = Transformer('6A(142).BMP')  # 320x240 = 76800 p
        vase = Transformer('vase.jpg')  # 320x400 = 128000 p
        city = Transformer('DUSS.BMP')  # 672x473 = 317856 p
        round_tr = Transformer('round.jpg')  # 800x800 = 640000 p
        dew = Transformer('jankaluza_dew_drop.jpg')  # 3840x2562 = 9838080 p
        fire = Transformer('vovalente_fire.jpg')  # 5077x3385 = 17185645 p
    except FileNotFoundError as er:
        print(er)
        sys.exit()
    with open('timing-fft.txt', 'w') as out:
        out.write('# number TIME\n')
    transformers = [screwdriver, vase, city, round_tr, dew, fire]
    sizes = [76800, 128000, 317856, 640000, 9838080, 17185645]
    for size, transformer in zip(sizes, transformers):
        start = time.time()
        transformer.transform()
        end = time.time()
        with open('timing-fft.txt', 'a') as log:
            log.write(str(size) + ' ' + str(end - start) + '\n')
예제 #24
0
 def run_comp():
     """Demonstrates the FT images of complex forms"""
     tr = Transformer('vase.jpg')
     tr2 = Transformer('face.jpg')
     tr.plot('Original vase')
     tr2.plot('Original face')
     tr.transform()
     tr.plot_fft('FT')
     tr2.transform()
     tr2.plot_fft('FT')
     tr.shift()
     tr.plot_fft('Shifted FT')
     tr2.shift()
     tr2.plot_fft('Shifted FT')
     Transformer.show_all()
     del tr, tr2
     gc.collect()
예제 #25
0
from log import logger
from combine import Combiner
from transform import Transformer
from core import SoxError
from core import SoxiError
from version import version as __version__
import os

# create transformer
tfm = Transformer()
# trim the audio between 5 and 10.5 seconds.
tfm.trim(5, 10.5)
# apply compression
tfm.compand()
# apply a fade in and fade out
tfm.fade(fade_in_len=1.0, fade_out_len=0.5)
# create the output file.
tfm.build('./input/audio.wav', './output/audio.aiff')
# see the applied effects
tfm.effects_log

# # create combiner
# cbn = Combiner()
# # pitch shift combined audio up 3 semitones
# cbn.pitch(3.0)
# # convert output to 8000 Hz stereo
# cbn.convert(samplerate=8000)
# # create the output file
# cbn.build(
#     ['input1.wav', 'input2.wav', 'input3.wav'], 'output.wav', 'concatenate'
# )
예제 #26
0
 def setUp(self):
     self.transformer = Transformer()
     self.project_dir = os.path.abspath(__file__ + "/../../")
예제 #27
0
def ingest(hdfsfile, file_no, datafolders):
    """

    :type hdfsfiles: str
    :type config: config.Config
    """
    try:
        process_info = dict()
        process_info['process_start_timestamp'] = datetime.utcnow().strftime(
            '%Y-%m-%d %H:%M:%S')
        process_info['user_name'] = getpass.getuser()
        process_info['file_name'] = os.path.basename(hdfsfile)
        process_info['file_no'] = file_no
        #spark = HiveContext(spark.sparkContext)
        file_config = getFileConfig(hdfsfile, config)

        if not file_config:
            err_msg = "can not find matched file configuration for file %s!" % hdfsfile
            logger.error(err_msg)
            return 'Error - ' + err_msg

        metadata = Metadata(sc)

        if hdfsutil.checkDuplicate(config, file_config, hdfsfile):
            hdfsutil.move_to_error_archive(config, file_config, hdfsfile,
                                           process_info)
            metadata.log_error_table(spark, file_config, config, process_info,
                                     "duplicate file")
            return 'Error - a file with same name already exists in archive folder!' + \
                   ' Moved to error_archive "' + process_info['error_archive_path'] + '".'

        metadata.loadKVFile(spark, config, file_config.get('kv_file'),
                            process_info)

        reader = Reader(spark)
        df = reader.read(hdfsfile, file_config, metadata, process_info)

        process_info['row_count'] = df.count()

        logger.info("row count in file %s is %d" %
                    (hdfsfile, process_info['row_count']))

        if process_info['row_count'] == 0:
            if file_config.get('empty_check', "no").lower() == "yes":
                hdfsutil.move_to_error_archive(config, file_config, hdfsfile,
                                               process_info)
                metadata.log_error_table(spark, file_config, config,
                                         process_info, "file is empty")
                return 'Error - Empty File!' + \
                    ' Moved to error_archive "' + process_info['error_archive_path'] + '".'
            else:
                hdfsutil.move_to_archive(config, file_config, hdfsfile)
                logger.warn('%s is empty!' % hdfsfile)
                return 'Success'

        validator = Validator(sc)
        validator.val_column_num(df.columns, metadata.data_types, process_info)
        val_error = validator.get_error()
        if val_error:
            hdfsutil.move_to_error_archive(config, file_config, hdfsfile,
                                           process_info)
            metadata.log_error_table(spark, file_config, config, process_info,
                                     val_error)
            return 'Error - ' + val_error + \
                ' Moved to error_archive "' + process_info['error_archive_path'] + '".'

        transformer = Transformer(sc)
        df = transformer.trans_data_types(spark, df, file_config, metadata,
                                          process_info)
        df = validator.val_data_types(spark, df, file_config, metadata,
                                      process_info)

        transformedColumns = [col for col in df.columns if col[:2] == '__']

        writer = Writer(sc)
        val_error = validator.get_error()
        if val_error:
            error_df = df.where('length(_error_message) > 0').drop(
                *transformedColumns)
            writer.write_errorfile(error_df, config, file_config, process_info)

            df.unpersist()
            logger.error('file %s failed at data type validation' % hdfsfile)
            hdfsutil.move_to_error_archive(config, file_config, hdfsfile,
                                           process_info)
            metadata.log_error_table(spark, file_config, config, process_info,
                                     val_error)
            return 'Error - ' + val_error + \
                ' Moved to error_archive "' + process_info['error_archive_path'] + '".' + \
                ' Error file path "' + process_info['error_file_path'] + '".'
        else:
            orig_columns = [col[1:] for col in transformedColumns]
            data_df = df.drop(*orig_columns) \
                .drop('_error_message') \
                .withColumn('source_filename', lit(os.path.basename(hdfsfile))) \
                .withColumn('process_timestamp', to_timestamp(lit(process_info['process_start_timestamp']), 'yyyy-MM-dd HH:mm:ss'))

            writer.write_orc(data_df, spark, config, metadata, process_info)
            datafolders.add(process_info['hdfs_datafile_path'])

            df.unpersist()
            hdfsutil.move_to_archive(config, file_config, hdfsfile)
            process_info['process_end_timestamp'] = datetime.utcnow().strftime(
                '%Y-%m-%d %H:%M:%S')
            metadata.log_registry_table(spark, file_config, config,
                                        process_info)
            logger.info('file %s has been successfully ingested' % hdfsfile)
            return 'Success'
    except Exception as e:
        logger.error('file %s ingestion failed! exception is %s' %
                     (hdfsfile, str(e)))
        return 'Failed - Exception happened! Please see Yarn log for details'
예제 #28
0
import sys
sys.path.append("/home/app/code/")

from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from extract import Extract
from transform import Transformer
from load import Load
if __name__ == '__main__':


    spark = SparkSession \
       .builder \
       .appName("Covid App") \
       .config("spark.some.config.option", "some-value") \
       .getOrCreate()

    sqlContext = SQLContext(spark)

    df = Extract(spark)
    df = df.extract_covid_data()

    transformer = Transformer(df, sqlContext)
    transformer.data_types_transformations()
    transformed_df = transformer.dimensions_transfomations()
    transformed_df = transformer.fill_na(transformed_df)

    loader = Load(transformed_df)
    loader.load_data()
예제 #29
0
파일: actor.py 프로젝트: lzw-pku/AutoML
class Actor:
    def __init__(self, args):
        self.estimator = Estimator(emb_dim=args.emb_dim,
                                   n_hidden=args.n_hidden,
                                   bidirectional=args.bi,
                                   n_layer=args.n_layer,
                                   dropout=args.dropout,
                                   lr=args.lr,
                                   decay=args.decay,
                                   lr_p=args.lr_p,
                                   clip=args.clip,
                                   batch_size=args.batch,
                                   epoch_num=args.epoch_num,
                                   cuda=args.cuda,
                                   path=args.path)

        self.transformer = Transformer(prolog_grammar.GRAMMAR_DICTIONARY,
                                       prolog_grammar.ROOT_RULE)

        self.performances = []
        self.actions = []
        self.path = args.path

    def search(self):
        self.perform('initial')
        #exit(0)
        for i in range(25):
            print(i)
            try:
                self.step()
                self.perform(i)
            except BaseException as e:
                print(e)
                print(self.actions)
                print(self.performances)
                with open('gra.pkl', 'wb') as f:
                    pickle.dump(self.transformer.get_grammar_dict(), f)
                exit(-1)
        print(self.performances)
        #exit(0)

    def step(self):
        import time
        t1 = time.time()
        action_space = self.transformer.get_act_space()
        t2 = time.time()
        method = []
        i = -1
        while len(method) == 0:
            i = random.randint(0, 3)
            method = action_space[i]
        action = random.choice(method)
        print(i, action)
        if i == 0:
            self.transformer.creat_nt(action)
        elif i == 1:
            self.transformer.merge_nt(action)
        elif i == 2:
            self.transformer.combine_nt(*action)
        else:
            assert i == 3
            self.transformer.delete_prod(action)
        self.actions.append((i, action))

    def perform(self, name):
        grammar_dict, root_rule = self.transformer.get_grammar_dict()
        with open(os.path.join(self.path, f'grammar-{name}'), 'wb') as f:
            pickle.dump(self.transformer, f)
        perform = self.estimator.estimate(grammar_dict,
                                          root_rule,
                                          toy=False,
                                          name=repr(name))
        self.performances.append(perform)
        print(perform)
        return perform

    def exp(self, name):
        for _ in range(100):
            self.step()
        self.perform(name)

    def one(self):
        #with open(path, 'rb') as f:
        #    self.transformer = pickle.load(f)
        for i in range(50):
            self.step()
        grammar_dict, root_rule = self.transformer.get_grammar_dict()
        for i in range(10000):
            perform = self.estimator.estimate(grammar_dict,
                                              root_rule,
                                              toy=False,
                                              name='tmp')
            print(perform)