Beispiel #1
0
 def dataReceived(self,data):
     data = data.strip("\r\n")
     if data.startswith("vote"):
         self.factory.datastore.vote(data.split("|")[1])
     elif data.startswith("add"):
         args =  data.split("|")
         self.factory.datastore.add(args[1],args[2]) #name, path
Beispiel #2
0
    def run(self, clf):
        X, y = data.split(self.data)
        skf_config = config.xvalidation
        self.skf = StratifiedShuffleSplit(y, **skf_config)
        self.X = X
        self.y = y

        report = Report(" | ".join([step[0] for step in clf.steps]))

        X, y = self.X, self.y

        y_true = []
        y_predicted = []

        for train_index, test_index in self.skf:
            X_train, y_train = X[train_index], y[train_index]
            X_test, y_test = X[test_index], y[test_index]

            y_true += list(y_test)

            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)

            y_predicted += list(y_pred)

        report.summary = classification_report(y_true,
                                               y_predicted,
                                               target_names=("dead", "alive"))
        cm = confusion_matrix(y_true, y_predicted)
        report.confusion_matrix = cm
        report.confusion_matrix_norm = cm.astype("float") / cm.sum(
            axis=1)[:, np.newaxis]
        report.accuracy = accuracy_score(y_true, y_predicted)

        return report
Beispiel #3
0
    def run(self, clf):
        X, y = data.split(self.data)
        skf_config = config.xvalidation
        self.skf = StratifiedShuffleSplit(y, **skf_config)
        self.X = X
        self.y = y

        report = Report(" | ".join([step[0] for step in clf.steps]))

        X, y = self.X, self.y

        y_true = []
        y_predicted = []

        for train_index, test_index in self.skf:
            X_train, y_train = X[train_index], y[train_index]
            X_test, y_test = X[test_index], y[test_index]

            y_true += list(y_test)

            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)

            y_predicted += list(y_pred)

        report.summary = classification_report(y_true, y_predicted, target_names=("dead", "alive"))
        cm = confusion_matrix(y_true, y_predicted)
        report.confusion_matrix = cm
        report.confusion_matrix_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
        report.accuracy = accuracy_score(y_true, y_predicted)

        return report
Beispiel #4
0
def test_split():
    indices = np.array([2, 4, 8, 9])
    expected = np.concatenate((indices, indices + 10))
    actual = np.array(list(data.split(indices, 10, range(20))))
    assert len(actual) == len(expected)
    assert actual.dtype == expected.dtype
    assert all(actual == expected)
Beispiel #5
0
def main(file_size):
    """Generates the files needed to run tagWeighting.py (the locality script).
    """
    data_funcs_by_size = {
        'small': data.get_small,
        'medium': data.get_medium,
        'large': data.get_large
    }
    all_data = data_funcs_by_size[file_size]()
    train, test = data.split(all_data, 0.8)

    metadata = []
    photo = []
    uid = []
    for img in train:
        pictureID = img['watchlink'].strip()
        tags = img['tags']
        lat, lon = img['latitude'], img['longitude']
        userID = img['userID']

        metadata.append('{0}\t0\t0\t{1}'.format(pictureID, ', '.join(tags)))
        photo.append('0\t0\t{0}\t{1}'.format(lon, lat))
        uid.append('0\t{0}'.format(userID))

    write_to_file(metadata, file_size + '_train_metadata')
    write_to_file(photo, file_size + '_train_photo')
    write_to_file(uid, file_size + '_train_uid')
Beispiel #6
0
	def process(self, opType):
		if(len(self.threads) == 0):
			self.responseLine.setText("No connection.")
			return
		#Get data inputed on UI
		dt = self.vectorLine.text()

		if(len(dt) == 0):
			self.responseLine.setText("No data to proccess.")
			return

		#Creates sub-vectors
		dt = list(map(int, dt.split()))
		vectorOfSubvectors = self.vectorFragmentation(dt)
		resp = dt[0]

		if opType == "MAX":
			typeFunction = max
		elif opType == "MIN":
			typeFunction = min
		else:
			self.responseLine.setText("Type of function error")
			self.update()
			return

		for i in range(len(self.threads)):
			resp = typeFunction(resp, int(self.threads[i].sendData(opType + "#" + " ".join(str(x) for x in vectorOfSubvectors[i]))))

		self.responseLine.setText(opType + " VALUE: " + str(resp))
		self.update()
Beispiel #7
0
def save_layout_file(name, view, layout, controls):
    dir = java.io.File('layouts')
    make_layout_dir(dir)

    fn = 'layouts/%s.layout' % name
    # Check if file exists
    # - If it does, extract java layout information, otherwise just make a new file
    java_layout = ""
    if java.io.File(fn).exists():
        # Save a backup of the layout file
        fp = java.io.File(fn).getCanonicalPath()
        copyfile(fp, fp + '.bak')

        f = file(fn, 'r')
        data = f.read()
        for line in data.split('\n'):
            if line.startswith('#'):
                java_layout += '\n' + line
        f.close()

    f = file(fn, 'w')

    layout_text = ',\n  '.join([repr(x) for x in layout])

    f.write('(%s,\n [%s],\n %s) %s' % (view, layout_text, controls, java_layout))
    f.close()
Beispiel #8
0
def load_saved_lf(filename='lf_data.dat'):
    lf_lines = open(filename, 'r').readlines()
    dic = {}
    for line in lf_lines:
        if line.startswith('#'): continue
        line = line.strip()
        filter = line.split(' ')[0]
        region = line.split(' ')[1]
        data = line[7:]
        bins = map(float, data.split(';')[0].split())
        hist = map(float, data.split(';')[1].split())
        if not filter in dic.keys(): dic[filter] = {}
        if not region in dic[filter].keys(): dic[filter][region] = {}
        #if not 'bins' in dic[filter][region].keys(): dic[filter][region]['bins'] = {}
        dic[filter][region] = {'bins': bins, 'hist': hist}
    return dic
Beispiel #9
0
def save_layout_file(name, view, layout, controls):
    dir = java.io.File('layouts')
    make_layout_dir(dir)

    fn = 'layouts/%s.layout' % name
    # Check if file exists
    # - If it does, extract java layout information, otherwise just make a new file
    java_layout = ""
    if java.io.File(fn).exists():
        # Save a backup of the layout file
        fp = java.io.File(fn).getCanonicalPath()
        copyfile(fp, fp + '.bak')

        f = file(fn, 'r')
        data = f.read()
        for line in data.split('\n'):
            if line.startswith('#'):
                java_layout += '\n' + line
        f.close()

    f = file(fn, 'w')

    layout_text = ',\n  '.join([repr(x) for x in layout])

    f.write('(%s,\n [%s],\n %s) %s' %
            (view, layout_text, controls, java_layout))
    f.close()
Beispiel #10
0
def main():

    print("load csv into a pandas dataframe")
    dt = data_loader()
    data = dt.load_data()
    print(f"data has {data.shape}")
    data = dt.encode_target(data)
    print(
        "preprocess data by removing outliers and encoding feature variables")
    data = dt.preprocess(data)
    #print(data.columns)
    print(
        "scale data using standardscaler and encoding using pandas get_dummies"
    )
    data = dt.scale_columns(data)
    print(f"data contains {data.columns}")

    sam = resample_data()
    data = sam.under_sample(data)

    print(data['y'].value_counts())

    s = split()
    data = s.train_test(data)
    print(data[0].shape)

    classifiers_cv(data[0], data[1], data[2], data[3])
Beispiel #11
0
def parse_to_dict(data):
    programs = {}
    for line in data.split('\n'):
        key, values = line.split(' <-> ')
        key = int(key)
        values = tuple((int(v) for v in values.split(',')))
        programs[key] = values
    return programs
Beispiel #12
0
def parse_text(data):
    parts = []

    for pipe in data.split('\n'):
        a, b = pipe.split('/')
        parts.append(Pipe(int(a), int(b)))

    return Parts(parts)
Beispiel #13
0
    def train_test_split(self, X, y, eval_size):
        if eval_size:
            X_train, X_valid, y_train, y_valid = data.split(
                X, y, test_size=eval_size)
        else:
            X_train, y_train = X, y
            X_valid, y_valid = X[len(X):], y[len(y):]

        return X_train, X_valid, y_train, y_valid
    def train_test_split(self, X, y, eval_size):
        if eval_size:
            X_train, X_valid, y_train, y_valid = data.split(
                X, y, test_size=eval_size)
        else:
            X_train, y_train = X, y
            X_valid, y_valid = X[len(X):], y[len(y):]

        return X_train, X_valid, y_train, y_valid
Beispiel #15
0
def compare_lenght(in_path):
    seq_dict = imgs.read_seqs(in_path)
    len_dict = get_len_dict(seq_dict)
    train, test = data.split(len_dict.keys())
    train, test = by_cat(train), by_cat(test)
    for cat_i in train.keys():
        train_i = np.mean([len_dict[name_i] for name_i in train[cat_i]])
        test_i = np.mean([len_dict[name_i] for name_i in test[cat_i]])
        print("%d,%.2f,%.2f" % (cat_i, test_i, train_i))
Beispiel #16
0
def test():
    clf = RandomForest
    X_train, y_train = data.split(data.train)
    X_test = data.extract_features(data.test)

    clf.fit(X_train, y_train)
    labels = clf.predict(X_test)
    pd.DataFrame({
        "PassengerId": np.array(data.test["PassengerId"]),
        "Survived": labels
    }).to_csv("submit.csv", index=False)
Beispiel #17
0
def person_model(in_path, out_path, n_epochs=100):
    seq_dict = imgs.read_seqs(in_path)
    train, test = data.split(seq_dict.keys())
    persons = [data.parse_name(name_i)[1] - 1 for name_i in train]
    persons = keras.utils.to_categorical(persons)
    X, y = to_dataset(train, seq_dict)
    n_cats, n_channels = y.shape[1], X.shape[-1]
    model = models.make_exp(n_cats, n_channels)
    model.summary()
    model.fit(X, y, epochs=n_epochs, batch_size=256)
    model.save(out_path)
Beispiel #18
0
def test():
    clf = RandomForest
    X_train, y_train = data.split(data.train)
    X_test = data.extract_features(data.test)

    clf.fit(X_train, y_train)
    labels = clf.predict(X_test)
    pd.DataFrame({
        "PassengerId": np.array(data.test["PassengerId"]),
        "Survived": labels
    }).to_csv("submit.csv", index=False)
Beispiel #19
0
def agum_template(raw_path,agum_path,agum,n_iters=10):
    raw_data=imgs.read_seqs(raw_path)
    train,test=data.split(raw_data.keys())
    train_data={ name_i:raw_data[name_i] for name_i in train}
    agum_dict={}
    for name_i,seq_i in list(train_data.items()):
        agum_seq_i = agum(images=seq_i)
        for j in range(n_iters):
            new_name_i=name_i+'_'+str(j)
            print(new_name_i)
            agum_dict[new_name_i]=agum_seq_i
    new_dict={**raw_data,**agum_dict}
    imgs.save_seqs(new_dict,agum_path)
Beispiel #20
0
    def apply_physics_properties_to_shape( self, node, shape ):

        shape.restitution = DEFAULT_RESTITUTION
        shape.density = DEFAULT_DENSITY
        shape.friction = DEFAULT_FRICTION

        data = node.getAttribute('physics_shape' )

        if data:
            keyvalues = data.split(',')
            for keyvalue in keyvalues:
                key,value = keyvalue.split('=')
                value = self.cast_value( value )
                setattr( shape, key, value )
Beispiel #21
0
def load_data(in_path, split=True):
    feat_dict = single.read_frame_feats(in_path)
    if (split):
        train, test = data.split(feat_dict.keys())
        train, test = prepare_data(train,
                                   feat_dict), prepare_data(test, feat_dict)
        params = {
            'ts_len': train[0].shape[1],
            'n_feats': train[0].shape[2],
            'n_cats': train[1].shape[1]
        }
        return train, test, params
    else:
        names = list(feat_dict.keys())
        return prepare_data(names, feat_dict), names
Beispiel #22
0
 def run(self, msg, user):
     c = httplib2.HTTPSConnectionWithTimeout("api.mrbesen.de")
     c.request("GET", "/pos.php")
     response = c.getresponse()
     if response.code == 200:
         data = response.read().decode().strip()
         time, loc, lastknowntime, lastknownloc = data.split("\n")
         if loc == 'unknown':
             print("unknown location")
             datestr = self.getTimeStr(time)
             return "Also am " + datestr + " war ich in " + lastknownloc + " aber grade bin ich Unterwegs."
         else:
             datestr = self.getTimeStr(time)
             return "Bin gerade (" + datestr + ") in " + loc
     return None
Beispiel #23
0
def sim_model(in_path,out_path):
    full=img_dataset(in_path)
    train,test=data.split(full.keys())
#    train={ name_i:full[name_i] for name_i in train}
    X0,X1,y=[],[],[]
    for i,name_i in enumerate(train):
        for name_j in train[i:]:
            y_k=int(name_i.split("_")[0]==name_j.split("_")[0])
            X0.append( full[name_i])
            X1.append(full[name_j])
            y.append(y_k)
    X=[np.array(X0),np.array(X1)]
    sim_metric,model=sim.frames.make_five(20,1)
    sim_metric.fit(X,y,epochs=250,batch_size=100)
    if(out_path):
        model.save(out_path)
Beispiel #24
0
def get_data():
    global times
    global tidalVolume
    global oldtidalVolume
    global peakPressure
    global oldpeakPressure
    global respirationRate
    global oldrespirationRate
    global oldTime
    global maxTime
    global corrupt
    global PEEP
    maxTime = 0
    startingTime = time.time()
    while (corrupt and (int(time.time() - startingTime) <= 1)):
        try:
            ser = serial.Serial('COM3', baudrate)
            corrupt = False
            while True:
                while (ser.inWaiting() == 0):
                    pass
                value = ser.readline()
                try:
                    data = str(value.decode("utf-8"))
                    data = data.split(",")
                    dataTime = int(data[0])
                    signal1 = int(data[1]) - 500
                    update_level(dataTime, 0, signal1, 0)
                except:
                    pass
        except:
            pass
    if corrupt == True:
        currTime = 0.0
        while True:

            if currTime / 1000 not in data1.keys():
                currTime = 0.0
                maxTime = 0
            #pp = data1[currTime/1000]/30
            pp = 45
            rr = data2[currTime / 1000] / 12
            tv = data3[currTime / 1000] / 3
            update_level(currTime, pp, rr, tv)
            currTime += 10
            time.sleep(0.01)
def get_data(file_size):
    """Fetches training and test data.

    Args:
        file_size: 'small', 'medium', or 'large' indicating the size of the desired dataset

    Returns:
        (train_data, test_data) where train_data and test_data are lists of data points (each data point is a dict)
    """
    data_funcs_by_size = {
        'small': data.get_small,
        'medium': data.get_medium,
        'large': data.get_large
    }
    all_data = data_funcs_by_size[file_size]()
    train_data, test_data = data.split(all_data, 0.8)
    return train_data, test_data
Beispiel #26
0
    def _ParseCookie(self, cookie):
        """Parses the cookie and returns NULL_COOKIE if it's invalid.

    Args:
      cookie: The text of the cookie.

    Returns:
      A map containing the values in the cookie.
    """
        try:
            (hashed, data) = cookie.split('|', 1)
            # global cookie_secret
            if hashed != str(hash(cookie_secret + data) & 0x7FFFFFF):
                return self.NULL_COOKIE
            values = data.split('|')
            return {
                COOKIE_UID: values[0],
                COOKIE_ADMIN: values[1] == 'admin',
                COOKIE_AUTHOR: values[2] == 'author',
            }
        except (IndexError, ValueError):
            return self.NULL_COOKIE
Beispiel #27
0
  def _ParseCookie(self, cookie):
    """Parses the cookie and returns NULL_COOKIE if it's invalid.

    Args:
      cookie: The text of the cookie.

    Returns:
      A map containing the values in the cookie.
    """
    try:
      (hashed, data) = cookie.split('|', 1)
      # global cookie_secret
      if hashed != str(hash(cookie_secret + data) & 0x7FFFFFF):
        return self.NULL_COOKIE
      values = data.split('|')
      return {
          COOKIE_UID: values[0],
          COOKIE_ADMIN: values[1] == 'admin',
          COOKIE_AUTHOR: values[2] == 'author',
      }
    except (IndexError, ValueError):
      return self.NULL_COOKIE
Beispiel #28
0
def load_params(train_prog, train_exe, place, logger, args=None):
    if not args.para_load_dir:
        return
    logger.info('loading para from {}'.format(args.para_load_dir))
    param_list = train_prog.block(0).all_parameters()
    param_name_list = [p.name for p in param_list]
    for data in listDir(args.para_load_dir):
        slot = int(data.split('_')[1].split('.')[0])
        with open(data, 'rb') as fin:
            if six.PY2:
                p_array = pickle.load(fin)
            else:
                p_array = pickle.load(fin, encoding='bytes')
            p_array = p_array.reshape((-1))
            offset = 0
            for name in name_dict:
                s = name_dict[name]
                if s == slot:
                    card = 0
                    #for scope in [train_exe.scope]:#train_exe.executor.local_scopes():
                    for scope in train_exe.executor.local_scopes():
                        tensor = scope.find_var(name).get_tensor()
                        shape = tensor.shape()
                        tensor_len = np.prod(shape)
                        new_array = p_array[offset:offset + tensor_len]
                        new_array = new_array.reshape(shape)
                        if args.use_gpu:
                            placex = fluid.CUDAPlace(card)
                        else:
                            placex = fluid.CPUPlace()
                        tensor.set(new_array.astype(np.float32), placex)
                        logger.info('card {} loaded {}[{}] from {}[{}:{}]'.
                                    format(card, name, shape, data, offset,
                                           offset + tensor_len))
                        card = card + 1
                    offset += tensor_len
Beispiel #29
0
def cmd_tio(self, msg, args, stdin):
    '''
    Run {prefix}tio [language] [code] to evaluate code in a given language on
    Try it online! (https://tio.run/). Specify additional sections on a
    separate line consisting of three hashes (###) followed by the section
    name, which can be any of: stdin (provide input), arg (provide any number
    of command line arguments), or stderr (specify this section to view stderr
    output in addition to stdout; you may also retroactively do this with
    {prefix}tio err).
    '''
    err = " (try `{}help tio` for more information)".format(self.prefix)
    if not args: return 'Basic usage: {}tio [lang] [code]'.format(self.prefix) + err
    if args == 'err': return self.tioerr
    lang, *rest = args.split(None, 1)
    rest = rest[0] if len(rest) else ''
    stdin = ''
    stderr = False
    args = []
    code, *parts = rest.split('\n###')
    for part in parts:
        name, data = part.split(None, 1) if '\n' in part or ' ' in part else (part, '')
        name = name.strip()
        if name == 'stdin': stdin = data
        elif name == 'stderr': stderr = True
        elif name == 'arg': args.append(data)
        else: return "Unknown section `{}`".format(name) + err
    try:
        data = requests.post('https://tio.run/cgi-bin/run/api/', zlib.compress(bytes('Vlang\u00001\u0000{}\u0000F.code.tio\u0000{}\u0000{}F.input.tio\u0000{}\u0000{}Vargs\u0000{}{}\u0000R'.format(lang, len(bytes(code, 'utf-8')), code, len(bytes(stdin, 'utf-8')), stdin, len(args), (len(args) * '\u0000{}').format(*args)), 'utf-8'), 9)[2:-4], timeout=5).content.decode('utf-8')
        data = data.split(data[:16])[1:]
        if len(data) == 1: return data[0]  # error
        dout, derr = [x.strip('\n') for x in data[:2]]
        self.tioerr = derr
        haserr = re.search('\nReal time: \\d+\\.\\d+ s\nUser time: \\d+\\.\\d+ s\nSys\\. time: \\d+\\.\\d+ s\nCPU share: \\d+\\.\\d+ %\nExit code: \\d+$', data[1]).start() > 0
        return (dout+'\n--- stderr ---\n'+derr if stderr else dout+('\n[stderr output - use {}tio err to view]'.format(self.prefix) if haserr else '')) or '[no output]'
    except requests.exceptions.ConnectionError:
        return '5 second timeout reached.'
Beispiel #30
0
#   digits_session_2_dataset
#   digits_session_3_dataset
#   digits_session_4_dataset
#
# sequence_groups[i] contains data for class i
# 4-dimensional data structure: (class, sequence_num, timestep, channel_num)

sequence_groups = transform_data(data.digits_session_4_dataset())

# Split sequence_groups into training and validation data
#training_sequence_groups, validation_sequence_groups = data.split(sequence_groups, 1./6)

# Manually selecting different training and validation datasets
training_sequence_groups = transform_data(data.digits_session_1_dataset())
validation_sequence_groups = transform_data(data.digits_session_4_dataset())
validation_sequence_groups = data.split(validation_sequence_groups, 1 / 6.)[1]
# Pads or truncates each sequence to length
length = 600
training_sequence_groups = data.transform.pad_truncate(
    training_sequence_groups, length)
validation_sequence_groups = data.transform.pad_truncate(
    validation_sequence_groups, length)

# Format into sequences and labels
train_sequences, train_labels = data.get_inputs(training_sequence_groups)
val_sequences, val_labels = data.get_inputs(validation_sequence_groups)

# Calculate sample weights
class_weights = compute_class_weight('balanced', np.unique(train_labels),
                                     train_labels)
train_weights = class_weights[list(train_labels)]
Beispiel #31
0
#sequence_groups = data.combine([
#        data.process(10, ['data/data/2_subvocal_digits_9_trials.txt']),
#        data.process(10, ['data/data/3_subvocal_digits_11_trials.txt']),
#        data.process(10, ['data/data/4_subvocal_digits_10_trials.txt']),
#        data.process(10, ['data/data/5_subvocal_digits_10_trials.txt']),
#        data.process(10, ['data/data/6_subvocal_digits_10_trials.txt']),
#    ])

#sequence_groups = sequence_groups[:2]

sequence_groups = data.transform.default_transform(sequence_groups)

#sequence_groups = np.array(map(lambda x: x[30:], sequence_groups))

# Split into training and validation data
training_sequence_groups, validation_sequence_groups = data.split(sequence_groups, 1./6)

# Augment training data by varying positioning of padding/truncating

# Uncomment
#training_sequence_groups = data.transform.pad_extra(training_sequence_groups, length)
#training_sequence_groups = data.transform.augment_pad_truncate_intervals(training_sequence_groups, length, 10)
#training_sequence_groups = data.transform.augment_pad_truncate_intervals(training_sequence_groups, length, 100)

#training_sequence_groups = data.transform.augment_pad_truncate_intervals(training_sequence_groups, length, 50)
training_sequence_groups = data.transform.pad_truncate(training_sequence_groups, length)

#training_sequence_groups = data.transform.augment(training_sequence_groups,
#                                                  [(data.transform.gaussian_filter, [3], {})],
#                                                  include_original=True)
Beispiel #32
0
		clf.fit(x_train,y_train)
		# result = pd.DataFrame.from_dict(clf.cv_results_)
		# with open(m[0]+'.csv','w') as f:
		# 	result.to_csv(f)
		print('The parameters of the best '+m[0]+' are: ')
		print(clf.best_params_)
		y_pred = clf.predict(x_train)
		print(classification_report(y_true=y_train, y_pred=y_pred))
		y_test_pred = clf.predict(x_test)
		# print(classification_report(y_true=y_test, y_pred=y_test_pred))
		# df_test_y = pd.DataFrame(y_test_pred , columns=['Survived'])
		df = pd.DataFrame(data.get_test_PassengerId()).join(pd.DataFrame(y_test_pred , columns=['Survived']))
		print(df.head())
		df.to_csv('./titanic_test_result_'+m[0]+'.csv',index=False)

import data,preprocess
if '__main__' == __name__:
	train_data = data.get_train_data()
	train_data =preprocess.fill_missing_data(train_data ,sex_cat=True, embarked_one_hot=True)
	# train_data = preprocess.feature_selection(train_data)
	# train_data = preprocess.detect_outlier(train_data,drop=True)
	print(train_data.head())
	x_train,y_train = data.split(train_data)
	# print(y_train.values)
	x_test,y_test = data.get_test_x(),data.get_test_y()
	x_test =preprocess.fill_missing_data(x_test,is_train=False,sex_cat=True, embarked_one_hot=True)
	# poly = PolynomialFeatures(2,interaction_only=True)
	# x_train = poly.fit_transform(x_train.values)
	# x_test = poly.fit_transform(x_test.values)
	model_selection(x_train.values,y_train.values,x_test.values,y_test.values)
	
Beispiel #33
0
def split_step(preprocessed_data_path, train_pct):
    from data import split
    train_data_path, test_data_path = split(preprocessed_data_path, train_pct)
    return train_data_path, test_data_path
Beispiel #34
0
# Update internal cache of parameters
params['num_samples'] = num_samples
params['num_features'] = num_features
params['num_frames'] = num_frames
params['x_scaler'] = MinMaxScaler(feature_range=(-1, 1))
params['y_scaler'] = MinMaxScaler(feature_range=(-1, 1))
params['num_testcases'] = 2

X = empty((num_frames, num_timesteps, num_features))
Y = empty((num_frames, num_predictions))
for i in range(num_samples - num_timesteps):
    X[i] = non_stationary[i:i + num_timesteps, ]
    Y[i] = non_stationary[i + num_timesteps:i + num_timesteps +
                          num_predictions, 0]
Y
X_scaled = np.array(
    [params['x_scaler'].fit_transform(X[i]) for i in range(X.shape[0])])
Y_scaled = params['y_scaler'].fit_transform(Y)
Y_scaled
X_train, Y_train, X_test, Y_test = data.split(X_scaled, Y_scaled,
                                              params['num_testcases'])
Y_test
y_unscaled = params['y_scaler'].inverse_transform(Y_test)
y_undiff = data.inverse_diff(y_unscaled, a[-(params['num_testcases'] + 1):,
                                           0:1])[-params['num_testcases']:]
y_undiff

print(data.recover_Ytest(Y_test, a, params))

# hi
Beispiel #35
0
N_CLASSES = 10
HIDDEN_SIZE = 256

CONV_L2 = 5e-4
FC_L2 = 1e-3
DROPOUT = 0.5

LR = 0.1
BATCH_SIZE = 128
EPOCHS = 300

TEST_TIME_K = 5

x_train, y_train, x_test, y_test = data.load_numpy()

x_train, y_train, _, _ = data.split(x_train, y_train, PERCENTAGE)

N_TRAIN_BATCHES = int(x_train.shape[0] / BATCH_SIZE)
N_TEST_BATCHES = int(x_test.shape[0] / BATCH_SIZE)

model = learning.cnn_classifier(number_of_classes=N_CLASSES,
                                hidden_layer_size=HIDDEN_SIZE,
                                conv_l2=CONV_L2,
                                fc_l2=FC_L2,
                                drop_out=DROPOUT)

opt = SGD(learning_rate=LR, momentum=0.9)

model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'],
Beispiel #36
0
#    data.combine([
#            map(lambda x: x[:30], data.digits_session_dependence_1_dataset(channels=range(1, 8))),
#            map(lambda x: x[:30], data.digits_session_dependence_2_dataset(channels=range(1, 8))),
#            map(lambda x: x[:40], data.digits_session_dependence_3_dataset(channels=range(1, 8))),
#        ]))
#
#print map(len, sequence_groups)

#lens = map(len, data.get_inputs(sequence_groups)[0])
#print min(lens), np.mean(lens), max(lens)

# Split sequence_groups into training and validation data
#training_sequence_groups, validation_sequence_groups = data.split(sequence_groups, 1./6)

# Manually selecting different training and validation datasets
training_sequence_groups, validation_sequence_groups = data.split(
    data.digits_session_dependence_3_dataset(channels=range(1, 8)), 1. / 6)
training_sequence_groups = transform_data(
    data.combine([
        #            map(lambda x: x[:30], data.digits_session_dependence_1_dataset(channels=range(1, 8))),
        #            map(lambda x: x[:30], data.digits_session_dependence_2_dataset(channels=range(1, 8))),
        #            map(lambda x: x[:40], data.digits_session_dependence_3_dataset(channels=range(1, 8))),
        training_sequence_groups,
    ]))
validation_sequence_groups = transform_data(
    data.combine([
        #            map(lambda x: x[:30], data.digits_session_dependence_1_dataset(channels=range(1, 8))),
        #            map(lambda x: x[:30], data.digits_session_dependence_2_dataset(channels=range(1, 8))),
        #            map(lambda x: x[:40], data.digits_session_dependence_3_dataset(channels=range(1, 8))),
        validation_sequence_groups,
    ]))
Beispiel #37
0
import pandas as pd
from data import get_dataset, split
from detl.mydb import db_client
from svm import SVMClassifier, confusion_matrix, accuracy

with db_client().as_default():

    digits = get_dataset()

    X_train, X_test, y_train, y_test = split(digits)

    classifier = SVMClassifier()

    classifier.fit(X_train, y_train)

    pred = classifier.predict(X_test)

    print(confusion_matrix(y_test, pred).data)
    print('Accuracy', accuracy(y_test, pred).data)