Beispiel #1
0
    def learn(self, dataset_name=''):
        
        if not dataset_name:
            dataset = Dataset.create_ds(self.options, prefix='learn')
        else:               
            dataset = Dataset.get_ds(self.options, dataset_name)

        try:
            while True:
                controls = self.c.getUpdates()
                state = self.izzy.getState()
                self.update_gripper(controls)

                controls = self.controls2simple(controls)
                if not all(int(c) == 0 for c in controls):
                        frame = self.bc.read_frame(show=self.options.show, record=self.options.record, state=state)
                        dataset.stage(frame, controls, state)
                
                print "supervisor: " + str(controls)
                time.sleep(0.05)
                
        except KeyboardInterrupt:
            pass
        
        dataset.commit()
        if self.options.record:
            self.bc.save_recording()
Beispiel #2
0
	def read_data(dirname, return_dataset=False):
		
		ds = Dataset(dirname, Reader.get_classes())
		emails, classes = [], []

		for sentences, email_type in ds.get_text():
			ds.build_vocabulary(sentences)
			emails.append(sentences)
			classes.append(email_type)

		
		# transform word to indices
		emails = [list(map(ds.get_word_indices().get, s)) for s in emails]

		# count how many times a word appear with the ith class
		counts = np.zeros((len(ds.vocabulary), len(set(classes))))
		for i, e in enumerate(emails):
			for w in e:
				counts[w, classes[i]] += 1 


		# emails = ds.bag_of_words(emails) # using bow we dont need counts

		if return_dataset:
			return np.array(emails), np.array(classes), counts, ds
		return np.array(emails), np.array(classes), counts
Beispiel #3
0
	def load_dataset(self):
		d = Dataset(self._logger)
		self._logger.info("Loading dataset...")
		self.X, self.y = d.load_csvs_from_folder(CSV_DIR)
		self._logger.info("Done loading dataset")
		self._logger.debug(str(self.X.shape))
		self._logger.debug(str(self.y.shape))
    def train(self, session, dataset: Dataset):
        word_vectors = []
        for tokens in dataset.get_tokens():
            word_vectors.append(self.encode_word_vector(tokens))

        slot_vectors = []
        for iob in dataset.get_iob():
            slot_vectors.append(self.encode_slot_vector(iob))

        cost_output = float('inf')
        for _ in range(self.__step_per_checkpoints):
            indexes = np.random.choice(len(word_vectors), self.__batch_size, replace=False)
            x = [word_vectors[index] for index in indexes]
            y = [slot_vectors[index] for index in indexes]

            self.__step += 1
            _, cost_output = session.run([self.__optimizer, self.__cost],
                                         feed_dict={self.__x: x,
                                                    self.__y: y,
                                                    self.__dropout: 0.5})

        checkpoint_path = os.path.join('./model', "slot_filling_model.ckpt")
        self.__saver.save(session, checkpoint_path, global_step=self.__step)

        return cost_output
    def dataset(self, mess, args):
        """send a csv dataset to your email address
           argument : date1(Y-m-d) date2(Y-m-d) sampling period (seconds)
           ex : dataset 2017-09-01 2017-09-02 600
                """
        knownUsers = self.unPickle("knownUsers")

        if len(args.split(' ')) == 3:
            dstart = args.split(' ')[0].strip().lower()
            dend = args.split(' ')[1].strip().lower()
            step = float(args.split(' ')[2].strip().lower())
        else:
            return 'not enough arguments'

        user = mess.getFrom().getNode()
        if user in knownUsers:
            try:
                dstart = dt.strptime(dstart, "%Y-%m-%d")
            except ValueError:
                return "ValueError : time data '%s'" % dstart + " does not match format '%Y-%m-%d'"

            try:
                dend = dt.strptime(dend, "%Y-%m-%d")
            except ValueError:
                return "ValueError : time data '%s'" % dend + " does not match format '%Y-%m-%d'"

            dataset = Dataset(self, mess.getFrom(), dstart.isoformat(), dend.isoformat(), step)
            dataset.start()
            return "Generating the dataset ..."

        else:
            return "Do I know you ? Send me your email address by using the command record "
Beispiel #6
0
def main():

  parser = argparse.ArgumentParser(description='Remove a dataset from the cache')
  parser.add_argument('dataset_name', action="store")

  result = parser.parse_args()
  ds = Dataset(result.dataset_name)
  ds.removeDataset()
Beispiel #7
0
def fullsim(name, nEvents, sigma, sigmaRelErr, filters, inputNames = None):
    if not inputNames:
        dataset = Dataset(name, [name], Dataset.FULLSIM, nEvents, sigma, sigmaRelErr, filters)
    else:
        dataset = Dataset(name, inputNames, Dataset.FULLSIM, nEvents, sigma, sigmaRelErr, filters)

    dataset.entryList = 'hardPhotonList'
    return dataset
 def test_dataset(self):
     cxn = yield connectAsync()
     context = yield cxn.context()
     dir = ['', 'Test']
     dataset = 1
     datasetName = 'Rabi Flopping'
     d = Dataset(cxn, context, dataset, dir, datasetName, None)
     d.openDataset()
     d.getData()
    def test_import_dataset(self):

        ## The Pixelman dataset object.
        pds = Dataset("testdata/ASCIIxyC/")

        # The tests.

        # The number of datafiles.
        self.assertEqual(pds.getNumberOfDataFiles(), 5)
	def __init__ (self, filename="93-15_top_9.npz"):
		data = np.load(filename)
		self.teams = data["teams"]
		Dataset.__init__(self, len(self.teams))
		self.pairwise_probs = data["probs"]
		print(len(self.teams))
		
		ranking = SE(len(self.teams), self.pairwise_probs)
		self.order = ranking.get_ranking()
Beispiel #11
0
def main():
    while True:
        data_set_name = input("Please provide the name of the data set you want to work with: ")

        # Load, Randomize, Normalize, Discretize Dataset
        data_set = Dataset()
        data_set.read_file_into_dataset("C:\\Users\\Grant\\Documents\\School\\Winter 2016\\CS 450\\Prove03\\" + data_set_name)
        data_set.randomize()
        data_set.data = normalize(data_set.data)
        data_set.discretize()

        data_set.set_missing_data()

        # Split Dataset
        split_percentage = 0.7
        data_sets    = split_dataset(data_set, split_percentage)
        training_set = data_sets['train']
        testing_set  = data_sets['test']

        # Create Custom Classifier, Train Dataset, Predict Target From Testing Set
        id3Classifier = ID3()
        id3Classifier.train(training_set)
        predictions = id3Classifier.predict(testing_set)

        id3Classifier.display_tree(0, id3Classifier.tree)

        # Check Results
        my_accuracy = get_accuracy(predictions, testing_set.target)
        print("Accuracy: " + str(my_accuracy) + "%")

        # Compare To Existing Implementations
        dtc = tree.DecisionTreeClassifier()
        dtc.fit(training_set.data, training_set.target)
        predictions = dtc.predict(testing_set.data)

        dtc_accuracy = get_accuracy(predictions, testing_set.target)
        print("DTC Accuracy: " + str(dtc_accuracy) + "%")

        # Do another or not
        toContinue = False

        while True:
            another = input("Do you want to examine another dataset? (y / n) ")

            if another != 'y' and another != 'n':
                print("Please provide you answer in a 'y' or 'n' format.")
            elif another == 'y':
                toContinue = True
                break
            else:
                toContinue = False
                break

        if not toContinue:
            break
Beispiel #12
0
	def __init__(self, data_path=None):

		self.data_path = data_path
		ds = Dataset(is_binary=True)
		ds.setup_dataset(data_path=self.data_path, train_split_scale=0.6)

		self.X = ds.Xtrain
		self.y = ds.Ytrain

		self.y = np.cast['uint8'](list(self.y))
		self.X = np.cast['float32'](list(self.X))
    def test_parse_iob(self):
        test = 'i would like to go from (Columbia University)[from_stop] to (Herald Square)[to_stop]'
        expected_iob = ['o', 'o', 'o', 'o', 'o', 'o', 'b-test.from_stop', 'i-test.from_stop', 'o', 'b-test.to_stop',
                        'i-test.to_stop']
        expected_tokens = ['i', 'would', 'like', 'to', 'go', 'from', 'columbia', 'university', 'to', 'herald', 'square']
        actual_iob, actual_tokens = Dataset.parse_iob('test', test)
        self.assertEqual(actual_iob, expected_iob)
        self.assertEqual(actual_tokens, expected_tokens)

        for slot in set(expected_iob):
            self.assertIn(slot, Dataset.get_slots())
Beispiel #14
0
	def log_preds(self, test_sentences=["hello", "how are you", "what is the meaning of life"]):
		d = Dataset(self._logger)

		for s in test_sentences:
			seed = np.zeros((TRAIN_BATCH_SIZE, (MAX_OUTPUT_TOKEN_LENGTH+1)*MSG_HISTORY_LEN, 29), dtype="bool")
			blahhh=d.sample({"Msg": s})
			for i in range(len(blahhh)):
				for j in range(len(blahhh[i])):
					seed[0][i][j]=blahhh[i][j]

			self._logger.info(self.predict_sentence(seed))
Beispiel #15
0
  def  __init__(self, gbtdirname=None, catalogfile=None):
    """GBTFilterbankData(gbtdirname=None, catalogfile=None)
    
    Read in GBT filterbank data by reference to the catalog file.
    """

    Dataset.__init__(self, gbtdirname, "gbtfil", '')

    self.read_gbt_dir(gbtdirname, catalogfile)

    self.xlabel = 'Frequency'
    self.ylabel = 'Time'
Beispiel #16
0
    def __init__(self, filename=None, name="UCI dataset"):
        """Dataset(filename="", name="") -> Dataset
    
    Creates a new Dataset based on the data in filename
    and with the name given.
    Name is used in output filename prefixes.

    This top-level class can't do anything itself,
    but needs to be instantiated with one of its subclasses.
    """

        Dataset.__init__(self, filename, name, "")
    def test_import_dataset(self):

        ## The Pixelman dataset object.
        pds = Dataset("data/sr/0-00_mm/ASCIIxyC/")

        # The tests.

        # The number of datafiles.
        self.assertEqual(pds.getNumberOfDataFiles(), 600)

        # The data format of the folder.
        self.assertEqual(pds.getFolderFormat(), "ASCII [x, y, C]")
    def test_import_dataset(self):

        ## The Pixelman dataset object.
        pds = Dataset("testdata/B06-W0212/2014-04-02-150255/ASCIIxyC/")

        # The tests.

        # The number of datafiles.
        self.assertEqual(pds.getNumberOfDataFiles(), 60)

        # The data format of the folder.
        self.assertEqual(pds.getFolderFormat(), "ASCII [x, y, C]")
Beispiel #19
0
  def __init__(self, filename=None, name='Floatdata'):
    """Dataset(filename="", name="") -> Dataset
    
    Creates a new Dataset based on the data in filename
    and with the name given.
    Name is used in output filename prefixes.
    Assumes CSV, floating point values, and no class labels.
    Commented lines (with #) are ignored.

    This top-level class can't do anything itself,
    but needs to be instantiated with one of its subclasses.
    """

    Dataset.__init__(self, filename, name, '')
def saveData():

		
	#Determination du chemin de l'application pour y chercher les donnes du dataset

	currentPath = os.path.dirname(os.path.realpath(__file__))

	FileUser = currentPath  + "/data/u.user"
	FileMovies = currentPath  + "/data/u.item"
	FileNotes = currentPath  + "/data/u1.base"

	dataset = Dataset(FileUser,FileMovies,FileNotes)
	dataset.vecteurNotes()
	for user in dataset.users:
		user.moyenne(dataset)
		user.normaliserVecteur()
	for i in dataset.notes:
		dataset.movies[i.idMovie-1].totalNotes = dataset.movies[i.idMovie-1].totalNotes + i.note
		dataset.movies[i.idMovie-1].nbNotes = dataset.movies[i.idMovie-1].nbNotes + 1
	for i in range(len(dataset.movies)):
		if(dataset.movies[i].nbNotes!=0):
			dataset.movies[i].avg = dataset.movies[i].totalNotes / dataset.movies[i].nbNotes


#ReducedDataset est notre dataset utilise pour effectuer notre clustering il contient : Age,Sexe, ainsi que 19 types de films
	reducedDataset = numpy.zeros((len(dataset.users),21))
	print "NB USERS : "
	print len(dataset.users)
	for i in range(len(dataset.users)):
		for j in range(20):

			if(j==0):
				reducedDataset[i,j] = float(dataset.users[i].age)/100
			if(j==1):
				if(dataset.users[i].genre == "M"):
					genre = 0
				else:
					genre = 1
				reducedDataset[i,j] = genre 
			if(j>1):
				reducedDataset[i,j] = dataset.users[i].vecteurNormalise[j-1]
	centroids, clusterAssment = kMeans(reducedDataset,10)
	with open('dataset.pkl', 'wb') as output:
		pickle.dump(dataset,output)
	with open('reducedDataset.pkl', 'wb') as output:
		pickle.dump(reducedDataset,output)
	with open('centroids.pkl', 'wb') as output:
		pickle.dump(centroids,output)
	with open('clusterAssment.pkl', 'wb') as output:
		pickle.dump(clusterAssment,output)
class StoreObject(State):
    def __init__(self, dataset, object_id):
        State.__init__(self,
                       outcomes=['stored'],
                       input_keys=['bounding_boxes', 'clusters', 'mean',
                                   'median', 'points'])
        base = roslib.packages.get_pkg_dir(PACKAGE)
        self.dataset = Dataset(join(base, 'common', 'data'), dataset)
        self.object_id = object_id

    def execute(self, ud):
        self.dataset.store(self.object_id, ud.bounding_boxes[0].dimensions,
                           ud.points, ud.mean, ud.median)
        return 'stored'
Beispiel #22
0
    def breakCubes(key, blosc_data):
      """break the cubes into smaller chunks"""
      
      key_array = [token, channel_name, res, x1, x2, y1, y2, z1, z2, time_stamp] = key.split('_')
      [res, x1, x2, y1, y2, z1, z2] = [int(i) for i in key_array[2:][:-1]]
      if blosc_data is None:
        return
      voxarray = blosc.unpack_array(blosc_data)
      
      br = BlazeRedis(token, channel_name, res)

      ds = Dataset(token)
      ch = ds.getChannelObj(channel_name)
      [zimagesz, yimagesz, ximagesz] = ds.imagesz[res]
      #[xcubedim, ycubedim, zcubedim] = cubedim = ds.cubedim[res]
      [xcubedim, ycubedim, zcubedim] = cubedim = CUBE_DIM
      [xoffset, yoffset, zoffset] = ds.offset[res]
      
      # Calculating the corner and dimension
      corner = [x1, y1, z1]
      dim = voxarray.shape[::-1][:-1]

      # Round to the nearest largest cube in all dimensions
      [xstart, ystart, zstart] = start = map(div, corner, cubedim)

      znumcubes = (corner[2]+dim[2]+zcubedim-1)/zcubedim - zstart
      ynumcubes = (corner[1]+dim[1]+ycubedim-1)/ycubedim - ystart
      xnumcubes = (corner[0]+dim[0]+xcubedim-1)/xcubedim - xstart
      numcubes = [xnumcubes, ynumcubes, znumcubes]
      offset = map(mod, corner, cubedim)

      data_buffer = np.zeros(map(mul, numcubes, cubedim)[::-1], dtype=voxarray.dtype)
      end = map(add, offset, dim)
      data_buffer[offset[2]:end[2], offset[1]:end[1], offset[0]:end[0]] = voxarray

      cube_list = []
      for z in range(znumcubes):
        for y in range(ynumcubes):
          for x in range(xnumcubes):
            zidx = XYZMorton(map(add, start, [x,y,z]))
           
            # Parameters in the cube slab
            index = map(mul, cubedim, [x,y,z])
            end = map(add, index, cubedim)

            cube_data = data_buffer[index[2]:end[2], index[1]:end[1], index[0]:end[0]]
            cube_list.append((br.generateSIKey(zidx), blosc.pack_array(cube_data.reshape((1,)+cube_data.shape))))
      
      return cube_list[:]
	def __init__ (self, num_players, noise):
		assert 0 <= noise <= 0.5 # Because noise = 0.5 is like a coin clip. More than 0.5 would reduce the amount of noise and reverse the ordering
		Dataset.__init__(self, num_players)
		self.order = np.arange(num_players)
		self.noise = noise
		#np.random.shuffle(self.order)

		for a in range(num_players):
			for b in range(num_players):
				a_rank = np.where(self.order == a)[0][0]
				b_rank = np.where(self.order == b)[0][0]
				if a_rank < b_rank:
					self.pairwise_probs[a,b] = 1 - noise
				else:
					self.pairwise_probs[a,b] = noise
Beispiel #24
0
  def  __init__(self, rawdirname=None, AODdirname=None, filename=None, force_read=False):
    """MISRData(rawdirname="", AODdirname="", filename="")
    
    Read in raw and AOD MISR data (pickled) from filename.
    If it doesn't exist, read in the full data from
    the appropriate dirnames, save it out, and proceed.
    """

    Dataset.__init__(self, filename, "misr", '')

    if (not os.path.exists(filename)) or force_read:
      MISRData.read_misr_dir(rawdirname, AODdirname, filename)
    
    # Read in the data
    self.readin()
    def __init__ (self, num_players, noise):
        assert 0 <= noise <= 1
        Dataset.__init__(self, num_players)
        self.order = np.arange(num_players)
        self.noise = noise
        np.random.shuffle(self.order)

        for a in range(num_players):
            for b in range(num_players):
                a_rank = np.where(self.order == a)[0][0]
                b_rank = np.where(self.order == b)[0][0]
                if a_rank < b_rank:
                    self.pairwise_probs[a,b] = 1 - noise
                else:
                    self.pairwise_probs[a,b] = noise
def master(src_cfg, suffix_in ,suffix_out, K, N, nr_processes, double_norm):
    D = 64

    dataset = Dataset(src_cfg, nr_clusters=K)
    samples = [str(sample) for sample in dataset.get_data('train')[0] +
               dataset.get_data('test')[0]]

    if double_norm:
        worker = double_normalization
        suffix = '.double_norm'
        gmm = load_gmm(
            os.path.join(
                dataset.FEAT_DIR + suffix_in, 'gmm',
                'gmm_%d' % K))
    else:
        worker = merge
        suffix = ''
        gmm = None

    path_in = os.path.join(
        dataset.FEAT_DIR + suffix_in,
        'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp')
    path_out = os.path.join(
        dataset.FEAT_DIR + suffix_out,
        'statistics_k_%d' % dataset.VOC_SIZE, 'stats.tmp' + suffix)

    sstats_in = SstatsMap(path_in)
    sstats_out = SstatsMap(path_out)

    len_sstats = dataset.VOC_SIZE + 2 * D * dataset.VOC_SIZE

    kwargs = {
        'N': N,
        'sstats_in': sstats_in,
        'sstats_out': sstats_out,
        'len_sstats': len_sstats,
        'gmm': gmm}

    if nr_processes > 1:
        nr_samples_per_process = len(samples) / nr_processes + 1
        for ii in xrange(nr_processes):
            mp.Process(target=worker,
                       args=(samples[
                           ii * nr_samples_per_process:
                           (ii + 1) * nr_samples_per_process], ),
                       kwargs=kwargs).start()
    else:
        worker(samples, **kwargs)
def generate_dataset(items, slots, voca: Vocabulary):
    dataset = Dataset()
    for item in items:
        vectors = []
        for word in item[0].split():
            vectors.append(voca.get(word))

        labels = []
        for tag in item[1].split():
            value = np.zeros([len(slots)], dtype=np.float32)
            value[slots.index(tag)] = 1
            labels.append(value)

        dataset.add(item[0], item[1], vectors, labels)

    return dataset
Beispiel #28
0
 def test_from_datasets(self):
     for testset in Dataset.import_from_path(self.QUERIES_PATH):
         print('Testing query: %s' % testset['tried_query'])
         try:
             self.assertMatch(testset['tried_query'], testset, center=testset.get('tried_location', None))
         except AssertionError as e:
             print("### FAIL: %s" % e)
Beispiel #29
0
class TestGradients(unittest.TestCase):
    def setUp(self):
        length = 100

        with open("tests/fixtures.pkl", "rb") as pkl:
            self.prices_jnj = cPickle.load(pkl)[:length]
            self.prices_apl = cPickle.load(pkl)[:length]

        self.data = Dataset(self.prices_jnj, [self.prices_apl])
        self.trX, self.trY, _, _ = self.data.build(0, 75, 5, 50)

    def test_linear_mean_return_model(self):
        model = Linear(delta=0.1, lmb=1.0)

        for i in range(10):
            diff = check_grad(model.cost, model.grad, model.weights(self.trX, i), self.trX, self.trY)

            self.assertTrue(diff < 1.0e-5, diff)

    def test_nonlinear_mean_return_model(self):
        model = Nonlinear(delta=0.1, lmb=1.0, hidden=7)

        for i in range(10):
            diff = check_grad(model.cost, model.grad, model.weights(self.trX, i), self.trX, self.trY)

            self.assertTrue(diff < 1.0e-5, diff)
Beispiel #30
0
def get_model(model_path):
    hyperparams = utils.load_dict_from_json_file(os.path.join(model_path, "hyperparams"))
    hyperparams['weights_initialization'] = "Zeros"

    trainingparams = utils.load_dict_from_json_file(os.path.join(model_path, "trainingparams"))
    dataset_name = trainingparams['dataset_name']

    if dataset_name != "binarized_mnist":
        raise ValueError("Invalid dataset. Only model trained on MNIST supported.")

    #
    # LOAD DATASET ####
    dataset = Dataset.get(dataset_name)
    if trainingparams['batch_size'] == -1:
        trainingparams['batch_size'] = dataset['train']['length']

    model = build_model(dataset, trainingparams, hyperparams, hyperparams['hidden_sizes'])

    print("### Loading model | Hidden:{0} CondMask:{1} Activation:{2} ... ".format(
        hyperparams['hidden_sizes'], hyperparams['use_cond_mask'], hyperparams['hidden_activation']), end=' ')
    start_time = t.time()
    load_model_params(model, model_path)
    print(utils.get_done_text(start_time), "###")

    return model, dataset_name, dataset
Beispiel #31
0
                     smooth_known_acc*100, smooth_abs_acc*100))
            if b > 1 and b % 100 == 0:
                print("saved!")
                model.save('./save/save.h5')


if __name__ == '__main__':
    args = parse_arguments()
    vocabs = []
    vocabs_file = os.path.join('./keywords', args.project)
    with open(vocabs_file) as fp:
        for line in fp:
            kw = re.sub(' [0-9]*$', '', line.strip())
            vocabs.append(kw)
    data_dir = os.path.join('./data', args.project)
    files = search_files([data_dir], args.suffixes)
    print("found %s files" % len(files))
    filetokens = []
    for i, name in enumerate(files):
        if i % 1000 == 0:
            print("%s files processed" % i)
        filetokens.append((name, tokenize(name)))

    dataset = Dataset(vocabs, args.win)
    model = LSTMModel(vocabs, args.dim, args.win)
    # model.load("./save/save.h5")
    train()
    for X, Y in dataset.next_batch(filetokens, 10):
        print("prediction: ", np.argmax(model.predict(X), axis=1),
              "correct: ", Y)
Beispiel #32
0
                      type='int',
                      default=7)

    (options, args) = parser.parse_args(sys.argv)

    dataset = options.dataset
    path = options.path
    load_data = options.load_data
    feature_extract = options.feature_extract
    nb_classes = options.nb_classes

    globalvars.dataset = dataset
    globalvars.nb_classes = nb_classes

    if load_data:
        ds = Dataset(path=path, dataset=dataset)

        print("Writing " + dataset + " data set to file...")
        pickle.dump(ds, open(dataset + '_db.p', 'wb'))
    else:
        print("Loading data from " + dataset + " data set...")
        ds = pickle.load(open(dataset + '_db.p', 'rb'))

    if feature_extract:
        extract_dataset(ds.data, nb_samples=len(ds.targets), dataset=dataset)

    try:
        trials = Trials()
        best_run, best_model = optim.minimize(model=create_model,
                                              data=get_data,
                                              algo=tpe.suggest,
Beispiel #33
0
from dataset import Dataset

d = Dataset("../cnn/questions/smallTest/numbered/", 3, 4000, 2000)
print d.batches

for i in range(10):
    x, y = d.next_batch()
    print x
    print y
Beispiel #34
0
import warnings
warnings.simplefilter('ignore')
from dataset import Dataset

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('--config_path',
                        type=str,
                        required=True,
                        help='path to the config_file')
    args = parser.parse_args()
    config_path = args.config_path

    with open(config_path, 'r') as f:
        config = json.load(f)
    SEED = config['seed']
    # we load the dataset and perform edge extraction to get the train test split
    print('loading dataset ...')
    dataset = Dataset(**config["dataset"])
    print('embedding network ...')
    dataset.embed_network(**config['embedding'])

    # we do the classification
    clf = LogisticRegression(random_state=SEED)

    dataset.embed_edges()
    clf.fit(dataset.x_train, dataset.y_train)
    test_pred_prob = clf.predict_proba(dataset.x_test)[:, 1]
    print('auc score on test set', roc_auc_score(dataset.y_test,
                                                 test_pred_prob))
Beispiel #35
0
from modelt import bulid_model
from dataset import Dataset, get_filenames
model = bulid_model()
model.summary()
folder = '/home/zsh/Fast_scnn/'
train_images, train_annotations, val_images, val_annotations = get_filenames(
    folder)
batch_size = 2
val_dataset = Dataset(image_size=[2048, 1024],
                      image_filenames=val_images,
                      annotation_filenames=val_annotations,
                      num_classes=21,
                      batch_size=batch_size)
try:
    model.load_weights('weights/weights-037-0.85.h5')
    print('successful load weights {}'.format('weights-037-0.85.h5'))
except Exception as e:
    print('Error {}'.format(e))
for i in range(10):
    f = model.evaluate_generator(val_dataset, steps=10 * (i + 1))
    print('f is {}'.format(f))
def train_topic(st, tt):

    device = torch.device("cuda:1")

    # with open('train_sem_mask.pickle', 'rb') as f:
    #     train_dataeval_mask_set = pickle.load(f)

    # with open('test_sem_mask.pickle', 'rb') as f:
    #     test_dataeval_mask_set = pickle.load(f)

    # with open('train_sem.pickle', 'rb') as f:
    #     train_dataeval_set = pickle.load(f)

    # with open('test_sem.pickle', 'rb') as f:
    #     test_dataeval_set = pickle.load(f)

    # with open('framenet.pickle', 'rb') as f:
    #     test_framenet_set = pickle.load(f)

    # with open('framenet_mask.pickle', 'rb') as f:
    #     test_framenet_mask_set = pickle.load(f)

    # with open('data_seen.pickle', 'rb') as f:
    #     data = pickle.load(f)
    # train_set, test_set = data['train'], data['test']

    # with open('data_seen_mask.pickle', 'rb') as f:
    #     data = pickle.load(f)
    # train_set_mask, test_set_mask = data['train'], data['test']

    ### Reading data...
    with open('data.pickle', 'rb') as f:
        data = pickle.load(f)
    # train_set, test_set = split_train_test(data)
    train_set = get_topic(data, st)
    test_set = get_topic(data, tt)

    with open('data_mask.pickle', 'rb') as f:
        data_mask = pickle.load(f)
    #train_set_mask, test_set_mask = split_train_test(data)
    train_set_mask = get_topic(data_mask, st)
    test_set_mask = get_topic(data_mask, tt)

    test_set, test_set_mask = test_set, test_set_mask

    train_pair = list(zip(train_set, train_set_mask))
    train_pair = negative_sampling(train_pair, 0.8)
    train_set, train_set_mask = [d[0] for d in train_pair
                                 ], [d[1] for d in train_pair]

    ###
    test_dataset = Dataset(10, test_set)
    test_dataset_mask = Dataset(10, test_set_mask)

    test_dataset_batch = [
        batch for batch in test_dataset.reader(device, False)
    ]
    test_dataset_mask_batch = [
        batch for batch in test_dataset_mask.reader(device, False)
    ]

    test_dataset_mix = list(zip(test_dataset_batch, test_dataset_mask_batch))

    ###
    train_dataset = Dataset(20, train_set)
    train_dataset_mask = Dataset(20, train_set_mask)

    train_dataset_batch = [
        batch for batch in train_dataset.reader(device, False)
    ]
    train_dataset_mask_batch = [
        batch for batch in train_dataset_mask.reader(device, False)
    ]

    train_dataset_mix = list(zip(train_dataset_batch,
                                 train_dataset_mask_batch))

    model = BertCausalModel(3).to(device)
    model_mask = BertCausalModel(3).to(device)

    learning_rate = 1e-5
    optimizer = BertAdam(model.parameters(), lr=learning_rate)
    optimizer_mask = BertAdam(model_mask.parameters(), lr=learning_rate)
    loss_fn = torch.nn.CrossEntropyLoss(reduction='sum')

    for _ in range(0, 20):
        idx = 0
        for batch, batch_mask in tqdm(train_dataset_mix,
                                      mininterval=2,
                                      total=len(train_dataset_mix),
                                      file=sys.stdout,
                                      ncols=80):
            idx += 1
            model.train()
            model_mask.train()
            sentences_s, mask_s, sentences_t, mask_t, event1, event1_mask, event2, event2_mask, data_y, _ = batch
            sentences_s_mask = batch_mask[0]

            opt = model.forward_logits(sentences_s, mask_s, sentences_t,
                                       mask_t, event1, event1_mask, event2,
                                       event2_mask)
            opt_mask = model_mask.forward_logits(sentences_s_mask, mask_s,
                                                 sentences_t, mask_t, event1,
                                                 event1_mask, event2,
                                                 event2_mask)

            opt_mix = torch.cat([opt, opt_mask], dim=-1)
            logits = model.additional_fc(opt_mix)
            loss = loss_fn(logits, data_y)

            optimizer.zero_grad()
            optimizer_mask.zero_grad()
            loss.backward()
            optimizer.step()
            optimizer_mask.step()

        model.eval()
        model_mask.eval()
        with torch.no_grad():
            predicted_all = []
            gold_all = []
            for batch, batch_mask in test_dataset_mix:
                sentences_s, mask_s, sentences_t, mask_t, event1, event1_mask, event2, event2_mask, data_y, _ = batch
                sentences_s_mask = batch_mask[0]

                opt = model.forward_logits(sentences_s, mask_s, sentences_t,
                                           mask_t, event1, event1_mask, event2,
                                           event2_mask)
                opt_mask = model_mask.forward_logits(sentences_s_mask, mask_s,
                                                     sentences_t, mask_t,
                                                     event1, event1_mask,
                                                     event2, event2_mask)

                opt_mix = torch.cat([opt, opt_mask], dim=-1)
                logits = model.additional_fc(opt_mix)

                predicted = torch.argmax(logits, -1)
                predicted = list(predicted.cpu().numpy())
                predicted_all += predicted

                gold = list(data_y.cpu().numpy())
                gold_all += gold
            p, r, f = compute_f1(gold_all, predicted_all)
            print(p, r, f)
            print('Here')
Beispiel #37
0
    def __init__(self,
                 cv,
                 network,
                 unet,
                 network_parameters,
                 learning_rate,
                 output_folder_name=''):
        """
        Initializer.
        :param cv: The cv fold. 0, 1, 2 for CV; 'train_all' for training on whole dataset.
        :param network: The used network. Usually network_u.
        :param unet: The specific instance of the U-Net. Usually UnetClassicAvgLinear3d.
        :param network_parameters: The network parameters passed to unet.
        :param learning_rate: The initial learning rate.
        :param output_folder_name: The output folder name that is used for distinguishing experiments.
        """
        super().__init__()
        self.batch_size = 1
        self.learning_rates = [
            learning_rate, learning_rate * 0.5, learning_rate * 0.1
        ]
        self.learning_rate_boundaries = [50000, 75000]
        self.max_iter = 100000
        self.test_iter = 10000
        self.disp_iter = 100
        self.snapshot_iter = 5000
        self.test_initialization = False
        self.current_iter = 0
        self.reg_constant = 0.0005
        self.use_background = True
        self.num_landmarks = 25
        self.heatmap_sigma = 4.0
        self.learnable_sigma = True
        self.data_format = 'channels_first'
        self.network = network
        self.unet = unet
        self.network_parameters = network_parameters
        self.padding = 'same'
        self.clip_gradient_global_norm = 100000.0

        self.use_pyro_dataset = False
        self.use_spine_postprocessing = True
        self.save_output_images = True
        self.save_output_images_as_uint = True  # set to False, if you want to see the direct network output
        self.save_debug_images = False
        self.has_validation_groundtruth = cv in [0, 1, 2]
        self.local_base_folder = '../verse2019_dataset'
        self.image_size = [96, 96, 128]
        self.image_spacing = [2] * 3
        self.cropped_inc = [0, 96, 0, 0]
        self.heatmap_size = self.image_size
        self.sigma_regularization = 100
        self.sigma_scale = 1000.0
        self.cropped_training = True
        self.output_folder = os.path.join('./output/vertebrae_localization/',
                                          network.__name__, unet.__name__,
                                          output_folder_name, str(cv),
                                          self.output_folder_timestamp())
        dataset_parameters = {
            'base_folder': self.local_base_folder,
            'image_size': self.image_size,
            'image_spacing': self.image_spacing,
            'cv': cv,
            'input_gaussian_sigma': 0.75,
            'generate_landmarks': True,
            'generate_landmark_mask': True,
            'translate_to_center_landmarks': True,
            'translate_by_random_factor': True,
            'save_debug_images': self.save_debug_images
        }

        dataset = Dataset(**dataset_parameters)
        if self.use_pyro_dataset:
            server_name = '@localhost:51232'
            uri = 'PYRO:verse_dataset' + server_name
            print('using pyro uri', uri)
            self.dataset_train = PyroClientDataset(uri, **dataset_parameters)
        else:
            self.dataset_train = dataset.dataset_train()
        self.dataset_val = dataset.dataset_val()

        self.point_statistics_names = [
            'pe_mean', 'pe_stdev', 'pe_median', 'num_correct'
        ]
        self.additional_summaries_placeholders_val = dict([
            (name, create_summary_placeholder(name))
            for name in self.point_statistics_names
        ])
Beispiel #38
0
from bracket_generator import BracketGenerator
from dataset import Dataset
import tensorflow as tf

if __name__ == '__main__':
    gen = BracketGenerator(min_len=5, max_len=10)
    # next_batch = tf.data.Dataset.from_generator(
    #     gen,
    #     (tf.string, tf.int64),
    #     (tf.TensorShape([]), tf.TensorShape([3]))
    # ).batch(32).make_one_shot_iterator().get_next()

    # for i, x in enumerate(gen()):
    #     print(x)
        
    #     if i > 10:
    #         break


    # model = RNNModel()

    dataset = Dataset(max_len=10, min_len=1)

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        x = sess.run(dataset.next)
        print(x)

def main():

    train_image_list = sorted(
        glob.glob(
            pathname=
            '../input/uavid-semantic-segmentation-dataset/train/train/*/Images/*.png',
            recursive=True))
    train_mask_list = sorted(
        glob.glob(pathname='./trainlabels/*/TrainId/*.png', recursive=True))
    valid_image_list = sorted(
        glob.glob(
            pathname=
            '../input/uavid-semantic-segmentation-dataset/valid/valid/*/Images/*.png',
            recursive=True))
    valid_mask_list = sorted(
        glob.glob(pathname='./validlabels/*/TrainId/*.png', recursive=True))

    preprocessing_fn = smp.encoders.get_preprocessing_fn(
        config.ENCODER, config.ENCODER_WEIGHTS)

    train_dataset = Dataset(
        train_image_list,
        train_mask_list,
        augmentation=augmentations.get_training_augmentation(),
        preprocessing=augmentations.get_preprocessing(preprocessing_fn),
        classes=config.CLASSES,
    )

    valid_dataset = Dataset(
        valid_image_list,
        valid_mask_list,
        augmentation=augmentations.get_validation_augmentation(),
        preprocessing=augmentations.get_preprocessing(preprocessing_fn),
        classes=config.CLASSES,
    )

    train_loader = DataLoader(train_dataset,
                              batch_size=config.BATCH_SIZE,
                              shuffle=True,
                              num_workers=2,
                              pin_memory=True,
                              drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=config.BATCH_SIZE,
                              shuffle=False,
                              num_workers=2,
                              pin_memory=True,
                              drop_last=False)

    loaders = {"train": train_loader, "valid": valid_loader}

    base_optimizer = RAdam([
        {
            'params': model.MODEL.decoder.parameters(),
            'lr': config.LEARNING_RATE
        },
        {
            'params': model.MODEL.encoder.parameters(),
            'lr': 1e-4
        },
        {
            'params': model.MODEL.segmentation_head.parameters(),
            'lr': config.LEARNING_RATE
        },
    ])
    optimizer = Lookahead(base_optimizer)
    criterion = BCEDiceLoss(activation=None)
    runner = SupervisedRunner()
    scheduler = OneCycleLRWithWarmup(optimizer,
                                     num_steps=config.NUM_EPOCHS,
                                     lr_range=(0.0016, 0.0000001),
                                     init_lr=config.LEARNING_RATE,
                                     warmup_steps=2)

    callbacks = [
        IouCallback(activation='none'),
        ClasswiseIouCallback(classes=config.CLASSES, activation='none'),
        EarlyStoppingCallback(patience=config.ES_PATIENCE,
                              metric='iou',
                              minimize=False),
    ]
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        loaders=loaders,
        callbacks=callbacks,
        logdir=config.LOGDIR,
        num_epochs=config.NUM_EPOCHS,
        # save our best checkpoint by IoU metric
        main_metric="iou",
        # IoU needs to be maximized.
        minimize_metric=False,
        # for FP16. It uses the variable from the very first cell
        fp16=config.FP16_PARAMS,
        # prints train logs
        verbose=True,
    )
def train(opts):
    # Define environment
    set_gpus(opts.gpu)
    device = torch.device("cuda")

    # Other params
    batch_size: int = 32
    latent_dimension: int = 1
    validation_size: int = 36

    os.makedirs(opts.output_path, exist_ok=True)

    # Define models
    generator = Generator(latent_dimension).to(device, non_blocking=True)
    discriminator = Discriminator().to(device, non_blocking=True)

    # Define train data loader
    max_iterations: int = 200000
    dataset = Dataset(max_iterations * batch_size)
    train_dataloader = torch.utils.data.DataLoader(dataset,
                                                   batch_size=batch_size,
                                                   shuffle=False,
                                                   pin_memory=True)
    val_dataloader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=validation_size,
                                                 shuffle=False,
                                                 pin_memory=True)

    # Define optimizers
    optimizer_g = torch.optim.Adam(generator.parameters(),
                                   lr=0.0001,
                                   betas=(0.5, 0.99))
    optimizer_d = torch.optim.Adam(discriminator.parameters(),
                                   lr=0.0001,
                                   betas=(0.5, 0.99))

    criterion = torch.nn.functional.binary_cross_entropy_with_logits

    # Define validation params
    z_validation = torch.randn(validation_size,
                               latent_dimension,
                               1,
                               1,
                               device=device)

    # Export some real images
    real_sample_images = to_rgb(next(iter(val_dataloader)))
    real_sample_grid = image_grid(real_sample_images)
    real_sample_grid.save(os.path.join(opts.output_path, f"real.png"))

    # Train loop
    for iteration, images in enumerate(train_dataloader):
        # Move data to gpu
        images = images.to(device, non_blocking=True)

        # Define targets
        fake_target = torch.zeros(batch_size, 1, 1, 1, device=device)
        real_target = torch.ones(batch_size, 1, 1, 1, device=device)

        # Train generator
        # sample z
        z = torch.randn(batch_size, latent_dimension, 1, 1, device=device)
        # get G(z): pass z through generator --> get prediction
        fake_sample = generator(z)
        # pass G(z) through discriminator
        fake_prediction = discriminator(fake_sample)
        # compute fake loss
        loss_generator = criterion(fake_prediction, real_target)

        # backprop through generator
        optimizer_g.zero_grad()
        loss_generator.backward()
        optimizer_g.step()

        # Train discriminator
        # pass real data through discriminator
        real_prediction = discriminator(images)
        # pass G(z).detach() through discriminator
        fake_prediction = discriminator(fake_sample.detach())

        # compute real loss
        loss_real = criterion(real_prediction, real_target)

        # compute fake loss
        loss_fake = criterion(fake_prediction, fake_target)
        loss_discriminator = (loss_real + loss_fake) / 2

        # backprop through discriminator
        optimizer_d.zero_grad()
        loss_discriminator.backward()
        optimizer_d.step()

        if iteration % opts.log_frequency == opts.log_frequency - 1:
            log_fragments = [
                f"{iteration + 1:>5}:",
                f"Loss(G): {loss_generator.item():>5.4f}",
                f"Loss(D): {loss_discriminator.item():>5.4f}",
                f"Real Pred.: {torch.sigmoid(real_prediction).mean().item():>5.4f}",
                f"Fake Pred.: {torch.sigmoid(fake_prediction).mean().item():>5.4f}",
            ]
            print(*log_fragments, sep="\t")

        # Validation
        if iteration % opts.validation_frequency == opts.validation_frequency - 1:
            with torch.no_grad():
                generator.eval()
                val_samples = generator(z_validation).to("cpu")
                generator.train()

            # output image
            val_grid_path = os.path.join(opts.output_path,
                                         f"{iteration+1:05d}.png")
            val_grid = image_grid(to_rgb(val_samples))
            val_grid.save(val_grid_path)
Beispiel #41
0
gpu_id = 0
train_model = True
result_path = os.path.join('DAVIS', 'Results', 'Segmentations', '480p', 'OSVOS', seq_name)

# Train parameters
parent_path = os.path.join('models', 'OSVOS_parent', 'OSVOS_parent.ckpt-50000')
logs_path = os.path.join('models', seq_name)
max_training_iters = 2000

# Define Dataset
test_frames = sorted(os.listdir(os.path.join('DAVIS', 'JPEGImages', '480p', seq_name)))
test_imgs = [os.path.join('DAVIS', 'JPEGImages', '480p', seq_name, frame) for frame in test_frames]
if train_model:
    train_imgs = [os.path.join('DAVIS', 'JPEGImages', '480p', seq_name, '00000.jpg')+' '+
                  os.path.join('DAVIS', 'Annotations', '480p', seq_name, '00000.png')]
    dataset = Dataset(train_imgs, test_imgs, './', data_aug=True)
else:
    dataset = Dataset(None, test_imgs, './')

# Train the network
if train_model:
    #More training parameters
    learning_rate = 1e-8
    save_step = max_training_iters
    side_supervision = 3
    display_step = 10
    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(gpu_id)):
            global_step = tf.Variable(0, name='global_step', trainable=False)
            osvos.train_finetune(dataset, parent_path, side_supervision, learning_rate, logs_path, max_training_iters,
                                 save_step, display_step, global_step, iter_mean_grad=1, ckpt_name=seq_name)
Beispiel #42
0
    # generate synthetic data
    obj_list = []
    for i in range(200):
        x = generate_data_square(nb_data=25, std=0.01)
        obj_list.append(x)
        os.makedirs('models' + save_fold + '/object', exist_ok=True)
        np.save('models' + save_fold + '/object' + '/square_{}.npy'.format(i),
                x)
        # x = generate_data(nb_data=100, noise=0.01)
        # obj_list.append(x)
        # np.save('models' + save_fold + '/object' + '/circle_{}.npy'.format(i), x)

    data = np.stack(obj_list, axis=0)
    print("object num:", data.shape[0], "sample num", data.shape[1])
    dataset = Dataset(data, knn=10)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=32,
                                              shuffle=True)

    net = build_network(input_dim=2, c_dim=c_dim)
    net.to(device)

    optimizer = optim.Adam(net.parameters())

    for epoch in range(1000):
        rec_err, eiko_err = train(net, data_loader, optimizer, device)
        print('epoch', epoch, 'rec_err:', rec_err, 'eiko_err:', eiko_err)
        # if epoch % 100 == 0:
        #     torch.save(net.state_dict(), 'models' + save_fold + '/model_{0:04d}.pth'.format(epoch))
Beispiel #43
0
from pretrain import PreTrainer
import random


if __name__=='__main__':


    random.seed(901)

    dataset_config = {'feature_file': './data/pubmed/features.txt',
                      'graph_file': './data/pubmed/edges.txt',
                      'walks_rel': './data/pubmed/walks_104010.txt',
                      'label_file': './data/pubmed/group.txt',
                      'walks_att': './data/pubmed/walks_6108.txt',
                      'similar_rate': 3}
    graph = Dataset(dataset_config)


    config = {
        'emb': './emb/pubmed.npy',
        
        'rel_shape': [500, 100],
        'att_shape': [200, 100],
        'rel_input_dim': graph.num_nodes,
        'att_input_dim': graph.num_feas,
        'is_init': True,
        'pretrain_params_path': './Log/pubmed/pretrain_params.pkl',
        
        'drop_prob': 0.2,
        'learning_rate': 1e-4,
        'batch_size': 50,
Beispiel #44
0
        rec = cum_tp / np.maximum(tot_gt_relations, np.finfo(np.float32).eps)
        rec_at_n[nre] = rec[-1]
    # calculate mean precision for tagging
    mprec_at_n = dict()
    for nre in tag_nreturns:
        mprec_at_n[nre] = np.mean(prec_at_n[nre])
    return mAP, rec_at_n, mprec_at_n


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Video vid_features relation evaluation.')
    parser.add_argument('prediction_file', type=str, help='Prediction json file')
    args = parser.parse_args()
    with open(args.prediction_file, 'r') as fin:
        prediction_json = json.load(fin)
    dataset = Dataset()
    # evaluate
    groundtruth = dict()
    for vid in dataset.get_index('test'):
        groundtruth[vid] = dataset.get_relation_insts(vid)
    mAP, rec_at_n, mprec_at_n = eval_visual_relation(groundtruth, prediction_json)
    print('detection mAP: {}'.format(mAP))
    print('detection recall@50: {}'.format(rec_at_n[50]))
    print('detection recall@100: {}'.format(rec_at_n[100]))
    print('tagging precision@1: {}'.format(mprec_at_n[1]))
    print('tagging precision@5: {}'.format(mprec_at_n[5]))
    print('tagging precision@10: {}'.format(mprec_at_n[10]))
    # evaluate in zero-shot setting
    print('-----zero-shot------')
    zeroshot_triplets = dataset.get_triplets('test').difference(
        dataset.get_triplets('train'))
            '2010.01.07.winter.weather', '2010.03.02.japan.unemployment.ft',
            '2010.01.18.uk.israel.livni', '2010.01.12.uk.islamist.group.ban'
        ]

    train_set_mask = list(filter(lambda x: not x[0] in test_set_file, data))
    test_set_mask = list(filter(lambda x: x[0] in test_set_file, data))

    print('Train', len(train_set), 'Test', len(test_set))

    train_pair = list(zip(train_set, train_set_mask))
    train_pair = negative_sampling(train_pair)
    train_set, train_set_mask = [d[0] for d in train_pair
                                 ], [d[1] for d in train_pair]

    ###
    test_dataset = Dataset(10, test_set)
    test_dataset_mask = Dataset(10, test_set_mask)

    test_dataset_batch = [
        batch for batch in test_dataset.reader(device, False)
    ]
    test_dataset_mask_batch = [
        batch for batch in test_dataset_mask.reader(device, False)
    ]

    test_dataset_mix = list(zip(test_dataset_batch, test_dataset_mask_batch))

    ###
    train_dataset = Dataset(20, train_set)
    train_dataset_mask = Dataset(20, train_set_mask)
Beispiel #46
0
print("*********************************")
print("Default FC-GAGA parameters:")
print(hyperparams_dict)
print("*********************************")

hyperparams_dict["dataset"] = 'metr-la'
hyperparams_dict["horizon"] = 12
hyperparams_dict["history_length"] = 12

print("*********************************")
print("LOADING DATA")
print("*********************************")

dataset = Dataset(name=hyperparams_dict["dataset"],
                  horizon=hyperparams_dict["horizon"],
                  history_length=hyperparams_dict["history_length"],
                  path=DATADIR)

hyperparams_dict["num_nodes"] = dataset.num_nodes
hyperparams = Parameters(**hyperparams_dict)

print("*********************************")
print("TRAINING MODELS")
print("*********************************")

trainer = Trainer(hyperparams=hyperparams, logdir=LOGDIR)
trainer.fit(dataset=dataset)

print("*********************************")
print("COMPUTING METRICS")
print("*********************************")
import numpy as np
import numpy.random as npr

from dataset import Dataset
from pipeline import BasePipeline


def build_toy_dataset(n_data=40, noise_std=0.1):
    D = 1
    rs = npr.RandomState(0)
    inputs = np.concatenate(
        [np.linspace(0, 2, num=n_data / 2),
         np.linspace(6, 8, num=n_data / 2)])
    targets = np.cos(inputs) + rs.randn(n_data) * noise_std
    inputs = (inputs - 4.0) / 4.0
    inputs = inputs.reshape((len(inputs), D))
    targets = targets.reshape((len(targets), D))
    return inputs, targets


X, y = build_toy_dataset(300, noise_std=0)
n_train = 200
n_test = 100
train = Dataset(X[:n_train], y[:n_train])
test = Dataset(X[n_train:], y[n_train:])

pipe = BasePipeline(train, X.shape[1], 1, test=test, verbose=True)
pipe.fit()
Beispiel #48
0
class GeneratingFontDesignGAN():
    """Generating font design GAN

    This class is only for generating fonts.
    """
    def __init__(self):
        global FLAGS
        self._setup_dirs()
        self._setup_params()
        self._setup_embedding_chars()
        if FLAGS.generate_walk:
            self.batch_size = FLAGS.batch_size
            while ((FLAGS.char_img_n * self.char_embedding_n) % self.batch_size
                   != 0) or (self.batch_size % self.char_embedding_n != 0):
                self.batch_size -= 1
            print('batch_size: {}'.format(self.batch_size))
            if FLAGS.generate_walk:
                self.walk_step = self.batch_size // self.char_embedding_n
                print('walk_step: {}'.format(self.walk_step))
            self._load_dataset()
        else:
            self._setup_inputs()
        self._prepare_generating()

    def _setup_dirs(self):
        """Setup output directories

        If destinations are not existed, make directories like this:
            FLAGS.gan_dir
            ├ generated
            └ random_walking
        """
        self.src_log = os.path.join(FLAGS.gan_dir, 'log')
        self.dst_generated = os.path.join(FLAGS.gan_dir, 'generated')
        if not os.path.exists(self.dst_generated):
            os.mkdir(self.dst_generated)
        if FLAGS.generate_walk:
            self.dst_walk = os.path.join(FLAGS.gan_dir, 'random_walking')
            if not os.path.exists(self.dst_walk):
                os.makedirs(self.dst_walk)

    def _setup_params(self):
        """Setup paramaters

        To setup GAN, read JSON file and set as attribute (self.~).
        JSON file's path is "FLAGS.gan_dir/log/flags.json".
        """
        with open(os.path.join(self.src_log, 'flags.json'), 'r') as json_file:
            json_dict = json.load(json_file)
        keys = [
            'chars_type', 'img_width', 'img_height', 'img_dim', 'style_z_size',
            'font_h5', 'style_ids_n', 'arch'
        ]
        for key in keys:
            setattr(self, key, json_dict[key])

    def _setup_embedding_chars(self):
        """Setup embedding characters

        Setup generating characters, like alphabets or hiragana.
        """
        self.embedding_chars = set_chars_type(self.chars_type)
        assert self.embedding_chars != [], 'embedding_chars is empty'
        self.char_embedding_n = len(self.embedding_chars)

    def _setup_inputs(self):
        """Setup inputs

        Setup generating inputs, batchsize and others.
        """
        assert os.path.exists(FLAGS.ids), '{} is not found'.format(FLAGS.ids)
        with open(FLAGS.ids, 'r') as json_file:
            json_dict = json.load(json_file)
        self.style_gen_ids_x, self.style_gen_ids_y, self.style_gen_ids_alpha = construct_ids(
            json_dict['style_ids'])
        self.char_gen_ids_x, self.char_gen_ids_y, self.char_gen_ids_alpha = construct_ids(
            json_dict['char_ids'])
        assert self.style_gen_ids_x.shape[0] == self.char_gen_ids_x.shape[0], \
            'style_ids.shape is not equal char_ids.shape'
        self.batch_size = self.style_gen_ids_x.shape[0]
        self.col_n = json_dict['col_n']
        self.row_n = math.ceil(self.batch_size / self.col_n)

    def _load_dataset(self):
        """Load dataset

        Setup dataset.
        """
        self.real_dataset = Dataset(self.font_h5, 'r', self.img_width,
                                    self.img_height, self.img_dim)
        self.real_dataset.set_load_data()

    def _prepare_generating(self):
        """Prepare generating

        Make tensorflow's graph.
        """
        self.z_size = self.style_z_size + self.char_embedding_n

        if self.arch == 'DCGAN':
            generator = GeneratorDCGAN(img_size=(self.img_width,
                                                 self.img_height),
                                       img_dim=self.img_dim,
                                       z_size=self.z_size,
                                       layer_n=4,
                                       k_size=3,
                                       smallest_hidden_unit_n=64,
                                       is_bn=False)
        elif self.arch == 'ResNet':
            generator = GeneratorResNet(k_size=3, smallest_unit_n=64)

        if FLAGS.generate_walk:
            style_embedding_np = np.random.uniform(
                -1, 1, (FLAGS.char_img_n // self.walk_step,
                        self.style_z_size)).astype(np.float32)
        else:
            style_embedding_np = np.random.uniform(
                -1, 1,
                (self.style_ids_n, self.style_z_size)).astype(np.float32)

        with tf.variable_scope('embeddings'):
            style_embedding = tf.Variable(style_embedding_np,
                                          name='style_embedding')
        self.style_ids_x = tf.placeholder(tf.int32, (self.batch_size, ),
                                          name='style_ids_x')
        self.style_ids_y = tf.placeholder(tf.int32, (self.batch_size, ),
                                          name='style_ids_y')
        self.style_ids_alpha = tf.placeholder(tf.float32, (self.batch_size, ),
                                              name='style_ids_alpha')
        self.char_ids_x = tf.placeholder(tf.int32, (self.batch_size, ),
                                         name='char_ids_x')
        self.char_ids_y = tf.placeholder(tf.int32, (self.batch_size, ),
                                         name='char_ids_y')
        self.char_ids_alpha = tf.placeholder(tf.float32, (self.batch_size, ),
                                             name='char_ids_alpha')

        # If sum of (style/char)_ids is less than -1, z is generated from uniform distribution
        style_z_x = tf.cond(
            tf.less(tf.reduce_sum(self.style_ids_x),
                    0), lambda: tf.random_uniform(
                        (self.batch_size, self.style_z_size), -1, 1),
            lambda: tf.nn.embedding_lookup(style_embedding, self.style_ids_x))
        style_z_y = tf.cond(
            tf.less(tf.reduce_sum(self.style_ids_y),
                    0), lambda: tf.random_uniform(
                        (self.batch_size, self.style_z_size), -1, 1),
            lambda: tf.nn.embedding_lookup(style_embedding, self.style_ids_y))
        style_z = style_z_x * tf.expand_dims(1. - self.style_ids_alpha, 1) \
            + style_z_y * tf.expand_dims(self.style_ids_alpha, 1)

        char_z_x = tf.one_hot(self.char_ids_x, self.char_embedding_n)
        char_z_y = tf.one_hot(self.char_ids_y, self.char_embedding_n)
        char_z = char_z_x * tf.expand_dims(1. - self.char_ids_alpha, 1) \
            + char_z_y * tf.expand_dims(self.char_ids_alpha, 1)

        z = tf.concat([style_z, char_z], axis=1)

        self.generated_imgs = generator(z, is_train=False)

        if FLAGS.gpu_ids == "":
            sess_config = tf.ConfigProto(device_count={"GPU": 0},
                                         log_device_placement=True)
        else:
            sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
                visible_device_list=FLAGS.gpu_ids))
        self.sess = tf.Session(config=sess_config)
        self.sess.run(tf.global_variables_initializer())

        if FLAGS.generate_walk:
            var_list = [
                var for var in tf.global_variables()
                if 'embedding' not in var.name
            ]
        else:
            var_list = [var for var in tf.global_variables()]
        pretrained_saver = tf.train.Saver(var_list=var_list)
        checkpoint = tf.train.get_checkpoint_state(self.src_log)
        assert checkpoint, 'cannot get checkpoint: {}'.format(self.src_log)
        pretrained_saver.restore(self.sess, checkpoint.model_checkpoint_path)

    def _concat_and_save_imgs(self, src_imgs, dst_path):
        """Concatenate and save images

        Connect some images and save at dst_path.

        Args:
            src_imgs: Images that will be saved.
            dst_path: Destination path of image.
        """
        concated_img = concat_imgs(src_imgs, self.row_n, self.col_n)
        concated_img = (concated_img + 1.) * 127.5
        if self.img_dim == 1:
            concated_img = np.reshape(concated_img,
                                      (-1, self.col_n * self.img_height))
        else:
            concated_img = np.reshape(
                concated_img, (-1, self.col_n * self.img_height, self.img_dim))
        pil_img = Image.fromarray(np.uint8(concated_img))
        pil_img.save(dst_path)

    def generate(self, filename='generated'):
        """Generate fonts

        Generate fonts from JSON input.
        """
        generated_imgs = self.sess.run(self.generated_imgs,
                                       feed_dict={
                                           self.style_ids_x:
                                           self.style_gen_ids_x,
                                           self.style_ids_y:
                                           self.style_gen_ids_y,
                                           self.style_ids_alpha:
                                           self.style_gen_ids_alpha,
                                           self.char_ids_x:
                                           self.char_gen_ids_x,
                                           self.char_ids_y:
                                           self.char_gen_ids_y,
                                           self.char_ids_alpha:
                                           self.char_gen_ids_alpha
                                       })
        self._concat_and_save_imgs(
            generated_imgs,
            os.path.join(self.dst_generated, '{}.png'.format(filename)))

    def generate_random_walking(self):
        """Generate fonts with random walking

        Generate fonts from random walking inputs.
        Results are changed gradually.
        """
        for c in self.embedding_chars:
            dst_dir = os.path.join(self.dst_walk, c)
            if not os.path.exists(dst_dir):
                os.mkdir(dst_dir)
        batch_n = (self.char_embedding_n * FLAGS.char_img_n) // self.batch_size
        c_ids = self.real_dataset.get_ids_from_labels(self.embedding_chars)
        for batch_i in tqdm(range(batch_n)):
            style_id_start = batch_i
            if batch_i == batch_n - 1:
                style_id_end = 0
            else:
                style_id_end = batch_i + 1
            generated_imgs = self.sess.run(
                self.generated_imgs,
                feed_dict={
                    self.style_ids_x:
                    np.ones(self.batch_size) * style_id_start,
                    self.style_ids_y:
                    np.ones(self.batch_size) * style_id_end,
                    self.style_ids_alpha:
                    np.repeat(
                        np.linspace(0., 1., num=self.walk_step,
                                    endpoint=False), self.char_embedding_n),
                    self.char_ids_x:
                    np.tile(c_ids, self.batch_size // self.char_embedding_n),
                    self.char_ids_y:
                    np.tile(c_ids, self.batch_size // self.char_embedding_n),
                    self.char_ids_alpha:
                    np.zeros(self.batch_size)
                })
            for img_i in range(generated_imgs.shape[0]):
                img = generated_imgs[img_i]
                img = (img + 1.) * 127.5
                pil_img = Image.fromarray(np.uint8(img))
                pil_img.save(
                    os.path.join(
                        self.dst_walk,
                        str(self.embedding_chars[img_i %
                                                 self.char_embedding_n]),
                        '{:05d}.png'.format(
                            (batch_i * self.batch_size + img_i) //
                            self.char_embedding_n)))
        print('making gif animations...')
        for i in range(self.char_embedding_n):
            make_gif(
                os.path.join(self.dst_walk, self.embedding_chars[i]),
                os.path.join(self.dst_walk, self.embedding_chars[i] + '.gif'))
Beispiel #49
0
if not os.path.isdir('log'):
    os.mkdir('log')

D_step = 5
G_step = 1
LR = 2e-4
n_iter = 200000
print_every = 1000
batch_size = 64
noise_dim = 100

G_path = 'log/generator.pt'
D_path = 'log/discriminator.pt'

dataset = Dataset()

generator = Generator()
discriminator = Discriminator()
generator = generator.cuda()
discriminator = discriminator.cuda()

# generator.load_state_dict(torch.load(G_path))
# discriminator.load_state_dict(torch.load(D_path))

gen_optim = optim.Adam(generator.parameters(), lr=LR)
discrim_optim = optim.Adam(discriminator.parameters(), lr=LR)

criterion = nn.BCELoss()

    return filtered_three_step_graph_paths



def extract_one_step_relpaths(sample, entity_2_samples):
    one_step_graph_paths = extract_one_step_graph_paths(sample, entity_2_samples)
    one_step_relpaths = set()
    for one_step_graph_path in one_step_graph_paths:
        path_head, path_relation, path_tail = one_step_graph_path
        one_step_relpaths.add(path_relation)
    return one_step_relpaths

def extract_two_step_relpaths(sample, entity_2_samples):
    two_step_graph_paths = extract_two_step_graph_paths(sample, entity_2_samples)
    two_step_relpaths = set()
    for two_step_graph_path in two_step_graph_paths:
        (e1, r1, e2a), (e2b, r2, e3) = two_step_graph_path
        two_step_relpaths.add(";".join([r1, r2]))
    return two_step_relpaths

def extract_three_step_relpaths(sample, entity_2_samples):
    three_step_graph_paths = extract_three_step_graph_paths(sample, entity_2_samples)
    three_step_relpaths = set()
    for three_step_graph_path in three_step_graph_paths:
        (e1, r1, e2a), (e2b, r2, e3a), (e3b, r3, e4) = three_step_graph_path
        three_step_relpaths.add(";".join([r1, r2, r3]))
    return three_step_relpaths


save(Dataset(FB15K))
Beispiel #51
0
        m1 = event1_mask.unsqueeze(-1).expand_as(event1).float()
        m2 = event2_mask.unsqueeze(-1).expand_as(event2).float()

        event1 = event1 * m1
        event2 = event2 * m2

        opt1 = torch.sum(event1, dim=1)
        opt2 = torch.sum(event2, dim=1)

        opt = torch.cat((opt1, opt2), 1)
        opt = self.drop(opt)
        return opt



if __name__ == '__main__':
    model = BertCausalModel(3)
    with open('data.pickle', 'rb') as f:
        data = pickle.load(f)
    print(data[250:251][0][2])

    dataset = Dataset(10, data[250:251])

    for batch in dataset.reader('cpu', True):
        sentences_s, mask_s, sentences_t, mask_t, event1, event1_mask, event2, event2_mask, data_y = batch
        print(sentences_s, sentences_t, event1, event2, data_y)
        opt = model(sentences_s, mask_s, sentences_t, mask_t, event1, event1_mask, event2, event2_mask)
        print(opt)
        # print(a, b, c)
        break
Beispiel #52
0
def main():
    ##Defining the parser
    parser = argparse.ArgumentParser(description="Tensorflow Trainer")
    parser.add_argument("--resume", type=str, help="resume from checkpoint: ./path/model.ckpt")
    parser.add_argument("--start_iteration", default=0, type=int, help="starting iterations")
    parser.add_argument("--stop_iteration", default=1000, type=int, help="starting iterations")
    parser.add_argument("--epochs", default=100, type=int, help="total epochs")
    parser.add_argument("--gpu", default=0, type=int, help="GPU index")
    parser.add_argument("--arch", default="yae", type=str, help="architecture to use for training: yae, cae")
    parser.add_argument("--implicit_units", default=32, type=int, help="implicit units in the code")
    parser.add_argument("--wdecay", default=0.0, type=float, help="Define the weight decay")
    parser.add_argument("--lrate", default= 0.0001, type=float, help="Learning rate for Adam")
    parser.add_argument("--mini_batch", default=128, type=int, help="mini-batch size")
    parser.add_argument("--lambda_e", default=1.0, type=float, help="Explicit loss mixing coefficient")
    parser.add_argument("--lambda_i", default=1.0, type=float, help="Implicit loss mixing coefficient")
    parser.add_argument("--beta", default=1.0, type=float, help="beta hyperparameter used in beta-VAE")
    args = parser.parse_args()

    #Set the GPU 
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu)    
    import tensorflow as tf

    #Set global hyperparameters
    learning_rate = args.lrate
    mini_batch_size = args.mini_batch
    tot_epochs = args.epochs
    tot_labels = 10
    dataset_size = 60000
    tot_iterations = int((dataset_size / mini_batch_size) * tot_epochs)
    save_every_iteration = tot_iterations-1
    print_every_iteration = 25
    features_path = "./datasets/mnist/train/features.npy"
    labels_path = "./datasets/mnist/train/labels.npy"
    dataset_train = Dataset()
    dataset_train.load(features_path, labels_path, tot_labels, normalizer=255.0, shuffle=True, verbose=True)
    ##Set local hyperparameters
    if(args.arch=="yae"):
        simulation_path = "./results/yae" + "_ep" + str(args.epochs) +"_lambdae" + str(args.lambda_e) + "_lambdai" + str(args.lambda_i)
        from models.yae import Autoencoder
        my_net = Autoencoder(batch_size=mini_batch_size, channels=1, conv_filters=8, style_size=args.implicit_units, content_size=tot_labels, ksize=(3,3), start_iteration=args.start_iteration, dir_header=simulation_path)
    elif(args.arch=="cae"):
        simulation_path = "./results/cae" + "_ep" + str(args.epochs) + "_wdecay" + str(args.wdecay) + "_units" + str(args.implicit_units)
        from models.cae import Autoencoder
        my_net = Autoencoder(batch_size=mini_batch_size, channels=1, conv_filters=8, style_size=args.implicit_units, content_size=tot_labels, ksize=(3,3), start_iteration=args.start_iteration, dir_header=simulation_path)
    elif(args.arch=="cvae"):
        from models.cvae import Autoencoder
        simulation_path = "./results/cvae" + "_ep" + str(args.epochs) + "_wdecay" + str(args.wdecay) + "_units" + str(args.implicit_units) + "_beta" + str(args.beta) 
        my_net = Autoencoder(batch_size=mini_batch_size, channels=1, conv_filters=8, style_size=args.implicit_units, content_size=tot_labels, ksize=(3,3), start_iteration=args.start_iteration, dir_header=simulation_path, beta=args.beta)
    elif(args.arch=="aae"):
        from models.aae import Autoencoder
        simulation_path = "./results/aae" + "_ep" + str(args.epochs) + "_wdecay" + str(args.wdecay) + "_units" + str(args.implicit_units) 
        my_net = Autoencoder(batch_size=mini_batch_size, channels=1, conv_filters=4, style_size=args.implicit_units, content_size=tot_labels, ksize=(3,3), start_iteration=args.start_iteration, dir_header=simulation_path)
    elif(args.arch=="lenet"):
        simulation_path = "./results/lenet" + "_ep" + str(args.epochs) + "_wdecay" + str(args.wdecay) + "_lr" + str(args.lrate)
        from models.lenet import LeNet
        my_net = LeNet(batch_size=mini_batch_size, channels=1, conv_filters=8, tot_labels=10, ksize=(5,5), start_iteration=args.start_iteration, dir_header=simulation_path)    
    else:
        raise ValueError("[ERROR] The architecture '" + args.arch + "' does not exist!")

    #Init the session
    sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    my_net.init_summary(sess)

    if args.resume:
        print("[INFO] Resuming from checkpoint: " + str(args.resume))
        my_net.load(sess, args.resume)
    else:                           
        sess.run(tf.global_variables_initializer()) #WARNING: do not call it when the load() method is used

    print("[INFO] Starting training...")
    for iteration in range(args.start_iteration, tot_iterations):
        if(args.arch=="yae"):
            input_features, input_labels = dataset_train.return_features_labels(mini_batch_size, onehot=False)
            local_loss = my_net.train(sess, input_features, input_labels, 
                                      learning_rate, args.lambda_e, args.lambda_i, iteration, print_every_iteration)
        elif(args.arch=="cae"):
            input_features, input_labels = dataset_train.return_features_labels(mini_batch_size, onehot=False)
            local_loss = my_net.train(sess, input_features, input_labels, 
                                       learning_rate, iteration, print_every_iteration)
        elif(args.arch=="cvae" or args.arch=="aae"):
            input_features, input_labels = dataset_train.return_features_labels(mini_batch_size, onehot=False)
            local_loss = my_net.train(sess, input_features, input_labels, 
                                       learning_rate, iteration, print_every_iteration)
        elif(args.arch=="lenet"):
            input_features, input_labels = dataset_train.return_features_labels(mini_batch_size, onehot=False)
            local_loss = my_net.train(sess, input_features, input_labels, 
                                       learning_rate, iteration, print_every_iteration) 
        else:
            raise ValueError("[ERROR] The architecture '" + args.arch + "' does not exist!")

        if(iteration % print_every_iteration == 0):
            print("Iteration: " + str(iteration) + "/" + str(tot_iterations) + " [" + str(round((iteration/float(tot_iterations))*100.0, 1)) + "%]")
            print("Loss: " + str(local_loss))
            print("")
        if(iteration % save_every_iteration == 0 and iteration!=0):
                my_net.save(sess, verbose=True)
Beispiel #53
0
from model import Model


def sample_z(m, n):
    return np.random.normal(loc=0., scale=1., size=[m, n])
    #return np.random.uniform(-5., 5., size=[m, n])


def check_path(path):
    if not os.path.exists(path):
        os.makedirs(path)


if __name__ == '__main__':

    dt = Dataset('../NIST_npy/', one_hot=True)
    mb_size = 100
    x_dim = dt.train_data.shape[1]
    y_dim = dt.train_label.shape[1]
    z_dim = 100
    iteration = 10000
    path = 'save/'
    check_path(path)

    model = Model(x_dim, z_dim, y_dim)
    record_D_loss = []
    record_G_loss = []
    with tf.Session() as sess:
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()
        sess.run(init)
Beispiel #54
0
    def __init__(self, args):
        self.batch_size = args.batch_size
        self.no_cuda = args.no_cuda
        self.task = args.task

        # create exp directory
        file = [f for f in args.model_path.split('/')]
        if args.exp_name != None:
            self.experiment_id = args.exp_name
        else:
            self.experiment_id = time.strftime('%m%d%H%M%S')
        cache_root = 'cache/%s' % self.experiment_id
        os.makedirs(cache_root, exist_ok=True)
        self.feature_dir = os.path.join(cache_root, 'features/')
        sys.stdout = Logger(os.path.join(cache_root, 'log.txt'))

        # check directory
        if not os.path.exists(self.feature_dir):
            os.makedirs(self.feature_dir)
        else:
            shutil.rmtree(self.feature_dir)
            os.makedirs(self.feature_dir)

        # print args
        print(str(args))

        # get gpu id
        gids = ''.join(args.gpu.split())
        self.gpu_ids = [int(gid) for gid in gids.split(',')]
        self.first_gpu = self.gpu_ids[0]

        # generate dataset
        self.infer_dataset_train = Dataset(
            root=args.dataset_root,
            dataset_name=args.dataset,
            split='train',
            num_points=args.num_points,
        )
        self.infer_dataset_test = Dataset(
            root=args.dataset_root,
            dataset_name=args.dataset,
            split='test',
            num_points=args.num_points,
        )
        self.infer_loader_train = torch.utils.data.DataLoader(
            self.infer_dataset_train,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=args.workers
        )
        self.infer_loader_test = torch.utils.data.DataLoader(
            self.infer_dataset_test,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=args.workers
        )
        print("Inference set size (train):", self.infer_loader_train.dataset.__len__())
        print("Inference set size (test):", self.infer_loader_test.dataset.__len__())

        # initialize model
        if args.task == "reconstruct":
            self.model = ReconstructionNet(args)
        elif args.task == "classify":
            self.model = ClassificationNet(args)
        if args.model_path != '':
            self._load_pretrain(args.model_path)

        # load model to gpu
        if not args.no_cuda:
            if len(self.gpu_ids) != 1:  # multiple gpus
                self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids)
            else:
                self.model = self.model.cuda(self.gpu_ids[0])
Beispiel #55
0
        pyplot.imshow(generated_sdn, interpolation='none')
        pyplot.text(x, y, '(c) SiDN-GAN', size=12, ha='center')

        pyplot.subplot(1, 4, 4)
        pyplot.axis('off')
        pyplot.imshow(original, interpolation='none')
        pyplot.text(x, y, '(d) Ground Truth', size=12, ha='center')

        img_path = path + '/image-%04d.png' % (i + 1)
        pyplot.savefig(img_path)
        pyplot.close()


if __name__ == '__main__':

    dataset = Dataset(dataset='caltech256')
    dataset.split_test_data(test_sample=2000)
    noise_maker = NoiseMaker(shape=dataset.data_shape, noise_type='s&p')

    # dataset_name = 'cifar10-32x32'
    # dataset_name = 'caltech256-64x64'
    dataset_name = 'caltech256-128x128'

    model_folder = 'C:/PycharmProjects/NeuralNetworks-GAN/performance/'

    path = model_folder + dataset_name + '_evaluate'
    if not os.path.exists(path):
        os.makedirs(path)

    best_dn_gan_path = model_folder + dataset_name + '-dn_gan_2019-12-24' + '/epoch-0018/model_0018.h5'
    best_sdn_gan_path = model_folder + dataset_name + '-siamese_dn_gan_2019-12-21' + '/epoch-0019/model_0019.h5'
def main(args, configs):
    print("Prepare training ...")

    preprocess_config, model_config, train_config = configs

    # Get dataset
    dataset = Dataset(
        "train.txt", preprocess_config, train_config, sort=True, drop_last=True
    )
    batch_size = train_config["optimizer"]["batch_size"]
    group_size = 4  # Set this larger than 1 to enable sorting in Dataset
    assert batch_size * group_size < len(dataset)
    loader = DataLoader(
        dataset,
        batch_size=batch_size * group_size,
        shuffle=True,
        collate_fn=dataset.collate_fn,
    )

    # Prepare model
    model, optimizer = get_model(args, configs, device, train=True)
    model = nn.DataParallel(model)
    num_param = get_param_num(model)
    Loss = FastSpeech2Loss(preprocess_config, model_config).to(device)
    print("Number of FastSpeech2 Parameters:", num_param)

    # Load vocoder
    vocoder = get_vocoder(model_config, device)

    # Init logger
    for p in train_config["path"].values():
        os.makedirs(p, exist_ok=True)
    train_log_path = os.path.join(train_config["path"]["log_path"], "train")
    val_log_path = os.path.join(train_config["path"]["log_path"], "val")
    os.makedirs(train_log_path, exist_ok=True)
    os.makedirs(val_log_path, exist_ok=True)
    train_logger = SummaryWriter(train_log_path)
    val_logger = SummaryWriter(val_log_path)

    # Training
    step = args.restore_step + 1
    epoch = 1
    grad_acc_step = train_config["optimizer"]["grad_acc_step"]
    grad_clip_thresh = train_config["optimizer"]["grad_clip_thresh"]
    total_step = train_config["step"]["total_step"]
    log_step = train_config["step"]["log_step"]
    save_step = train_config["step"]["save_step"]
    synth_step = train_config["step"]["synth_step"]
    val_step = train_config["step"]["val_step"]

    outer_bar = tqdm(total=total_step, desc="Training", position=0)
    outer_bar.n = args.restore_step
    outer_bar.update()

    while True:
        inner_bar = tqdm(total=len(loader), desc="Epoch {}".format(epoch), position=1)
        for batchs in loader:
            for batch in batchs:
                batch = to_device(batch, device)

                # Forward
                output = model(*(batch[2:]))

                # Cal Loss
                losses = Loss(batch, output)
                total_loss = losses[0]

                # Backward
                total_loss = total_loss / grad_acc_step
                total_loss.backward()
                if step % grad_acc_step == 0:
                    # Clipping gradients to avoid gradient explosion
                    nn.utils.clip_grad_norm_(model.parameters(), grad_clip_thresh)

                    # Update weights
                    optimizer.step_and_update_lr()
                    optimizer.zero_grad()

                if step % log_step == 0:
                    losses = [l.item() for l in losses]
                    message1 = "Step {}/{}, ".format(step, total_step)
                    message2 = "Total Loss: {:.4f}, Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f}, Pitch Loss: {:.4f}, Energy Loss: {:.4f}, Duration Loss: {:.4f}".format(
                        *losses
                    )

                    with open(os.path.join(train_log_path, "log.txt"), "a") as f:
                        f.write(message1 + message2 + "\n")

                    outer_bar.write(message1 + message2)

                    log(train_logger, step, losses=losses)

                if step % synth_step == 0:
                    fig, wav_reconstruction, wav_prediction, tag = synth_one_sample(
                        batch,
                        output,
                        vocoder,
                        model_config,
                        preprocess_config,
                    )
                    log(
                        train_logger,
                        fig=fig,
                        tag="Training/step_{}_{}".format(step, tag),
                    )
                    sampling_rate = preprocess_config["preprocessing"]["audio"][
                        "sampling_rate"
                    ]
                    log(
                        train_logger,
                        audio=wav_reconstruction,
                        sampling_rate=sampling_rate,
                        tag="Training/step_{}_{}_reconstructed".format(step, tag),
                    )
                    log(
                        train_logger,
                        audio=wav_prediction,
                        sampling_rate=sampling_rate,
                        tag="Training/step_{}_{}_synthesized".format(step, tag),
                    )

                if step % val_step == 0:
                    model.eval()
                    message = evaluate(model, step, configs, val_logger, vocoder)
                    with open(os.path.join(val_log_path, "log.txt"), "a") as f:
                        f.write(message + "\n")
                    outer_bar.write(message)

                    model.train()

                if step % save_step == 0:
                    torch.save(
                        {
                            "model": model.module.state_dict(),
                            "optimizer": optimizer._optimizer.state_dict(),
                        },
                        os.path.join(
                            train_config["path"]["ckpt_path"],
                            "{}.pth.tar".format(step),
                        ),
                    )

                if step == total_step:
                    quit()
                step += 1
                outer_bar.update(1)

            inner_bar.update(1)
        epoch += 1
Beispiel #57
0
 print('Loading data...')
 if is_testing:
     if args.test_pc_in is not None:
         # single point cloud testing
         assert args.test_h5_out is not None
         net.simple_predict_and_save(sess,
                                     pc=np.genfromtxt(
                                         args.test_pc_in,
                                         delimiter=' ',
                                         dtype=float)[:, :3],
                                     pred_h5_file=args.test_h5_out)
     else:
         # batch testing
         test_data = Dataset(batch_size=batch_size,
                             n_max_instances=n_max_instances,
                             csv_path=conf.get_test_data_file(),
                             noisy=conf.is_test_data_noisy(),
                             fixed_order=True,
                             first_n=conf.get_test_data_first_n())
         net.predict_and_save(
             sess,
             dset=test_data,
             save_dir=conf.get_test_prediction_dir(),
         )
 else:
     train_data = Dataset(batch_size=batch_size,
                          n_max_instances=n_max_instances,
                          csv_path=conf.get_train_data_file(),
                          noisy=conf.is_train_data_noisy(),
                          fixed_order=False,
                          first_n=conf.get_train_data_first_n())
     val_data = Dataset(batch_size=batch_size,
Beispiel #58
0
                        help="Config file",
                        default="cfg/architecture.cfg",
                        type=str)

    return parser.parse_args()


args = arg_parse()

#Set up the neural network
print("Preparing network .....")
network = Network(args.cfgfile)
network.compile()

print("Loading input .....")
dataset = Dataset()
x_train, y_train, x_test, y_test = dataset.loadData(
    network.net_info.input_shape)

# # Encode the data
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print("Training network .....")
network.fit(x_train, y_train, x_test, y_test)

print("evaluation: ")
network.evaluate(x_test, y_test)

x_predict, y_predict = dataset.predictData(network.net_info.input_shape)
predict_images, predict_labels = dataset.predictImages()
Beispiel #59
0
def main(is_interactive=True,
         k=64,
         des_option=constants.ORB_FEAT_OPTION,
         svm_kernel=cv2.ml.SVM_LINEAR):
    if not is_interactive:
        experiment_start = time.time()
    # Check for the dataset of images
    if not os.path.exists(constants.DATASET_PATH):
        print("Dataset not found, please copy one.")
        return
    dataset = Dataset(constants.DATASET_PATH)
    dataset.generate_sets()

    # Check for the directory where stores generated files
    if not os.path.exists(constants.FILES_DIR_NAME):
        os.makedirs(constants.FILES_DIR_NAME)

    if is_interactive:
        des_option = input(
            "Enter [1] for using ORB features or [2] to use SIFT features.\n")
        k = input(
            "Enter the number of cluster centers you want for the codebook.\n")
        svm_option = input(
            "Enter [1] for using SVM kernel Linear or [2] to use RBF.\n")
        svm_kernel = cv2.ml.SVM_LINEAR if svm_option == 1 else cv2.ml.SVM_RBF

    des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME
    print(des_name)
    log = Log(k, des_name, svm_kernel)

    codebook_filename = filenames.codebook(k, des_name)
    print('codebook_filename')
    print(codebook_filename)
    start = time.time()
    end = time.time()
    log.train_des_time(end - start)
    start = time.time()
    end = time.time()
    log.codebook_time(end - start)
    # Train and test the dataset
    classifier = Classifier(dataset, log)
    svm, cluster_model = classifier.train(svm_kernel,
                                          k,
                                          des_name,
                                          des_option=des_option,
                                          is_interactive=is_interactive)
    print("Training ready. Now beginning with testing")
    result, labels = classifier.test(svm,
                                     cluster_model,
                                     k,
                                     des_option=des_option,
                                     is_interactive=is_interactive)
    print('test result')
    print(result, labels)
    # Store the results from the test
    classes = dataset.get_classes()
    log.classes(classes)
    log.classes_counts(dataset.get_classes_counts())
    result_filename = filenames.result(k, des_name, svm_kernel)
    test_count = len(dataset.get_test_set()[0])
    result_matrix = np.reshape(result, (len(classes), test_count))
    utils.save_csv(result_filename, result_matrix)

    # Create a confusion matrix
    confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32)
    for i in range(len(result)):
        predicted_id = int(result[i])
        real_id = int(labels[i])
        confusion_matrix[real_id][predicted_id] += 1

    print("Confusion Matrix =\n{0}".format(confusion_matrix))
    log.confusion_matrix(confusion_matrix)
    log.save()
    print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel)))
    if not is_interactive:
        experiment_end = time.time()
        elapsed_time = utils.humanize_time(experiment_end - experiment_start)
        print("Total time during the experiment was {0}".format(elapsed_time))
    else:
        # Show a plot of the confusion matrix on interactive mode
        utils.show_conf_mat(confusion_matrix)
Beispiel #60
0
def main(args):
    device = torch.device('cuda' if args.gpu else 'cpu')

    # Load pretrained model and tokenizer
    config_cls, model_cls, tokenizer_cls = MODEL_CLASSES[args.model_type]
    config = config_cls.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=args.num_labels,
    )

    tokenizer = tokenizer_cls.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
    )

    model = model_cls.from_pretrained(
        args.model_name_or_path,
        config=config,
    )
    model.to(device)

    text_field = TextField(tokenizer)
    label_field = LabelField(
        torch.long if args.num_labels > 1 else torch.float)

    if args.do_test:
        fields = [('src', text_field), ('ref', text_field)]
    else:
        fields = [('src', text_field), ('ref', text_field),
                  ('score', label_field)]

    # Training
    if args.do_train:
        # setup dataset
        print('Loading training data ...')
        train_data = Dataset(
            path_to_file=args.data,
            fields=fields,
            filter_pred=lambda ex: args.src_min <= len(ex.src) <= args.src_max \
                and args.ref_min <= len(ex.ref) <= args.ref_max
        )

        train_iter = Iterator(
            dataset=train_data,
            batch_size=args.batch_size,
            shuffle=True,
            repeat=False,
        )

        train(args, train_iter, model, device)
        model.save_pretrained(args.save_dir)
        tokenizer.save_pretrained(args.save_dir)

    # Evaluaiton
    if args.do_eval:
        model.eval()

        # setup dataset
        print('Loading development data ...')
        valid_data = Dataset(
            path_to_file=args.data,
            fields=fields,
            filter_pred=lambda ex: args.src_min <= len(ex.src) <= args.src_max \
                and args.ref_min <= len(ex.ref) <= args.ref_max
        )

        valid_iter = Iterator(
            dataset=valid_data,
            batch_size=args.batch_size,
            shuffle=True,
            repeat=False,
        )

        preds_list = []
        refs_list = []

        for batch in tqdm(valid_iter, total=len(valid_iter)):
            input_ids = torch.cat([batch.src, batch.ref[:, 1:]],
                                  dim=1).to(device)
            labels = batch.score.squeeze(1).to(device)

            token_type_ids = [
                torch.zeros_like(batch.src),
                torch.ones_like(batch.ref[:, 1:])
            ]
            token_type_ids = torch.cat(token_type_ids, dim=1).to(device)
            outputs = model(input_ids,
                            token_type_ids=token_type_ids,
                            labels=labels)[1]

            if args.num_labels > 1:
                preds = torch.argmax(outputs, dim=1)
            else:
                preds = torch.ge(outputs, args.threshold).int().squeeze(1)

            preds_list.append(preds.to('cpu'))
            refs_list.append(labels.int().to('cpu'))

        preds_list = torch.cat(preds_list)
        refs_list = torch.cat(refs_list)

        avg = 'macro' if args.num_labels > 1 else 'micro'
        precision = precision_score(refs_list, preds_list, average=avg)
        recall = recall_score(refs_list, preds_list, average=avg)
        f1 = f1_score(refs_list, preds_list, average=avg)

        print(f"Presion: {precision * 100}", end='\t')
        print(f"Recall: {recall * 100}", end='\t')
        print(f"F1 score: {f1 * 100}")

    if args.do_test:
        model.eval()

        # setup dataset
        print('Loading test data ...')
        test_data = Dataset(
            path_to_file=args.data,
            fields=fields,
            filter_pred=lambda ex: args.src_min <= len(ex.src) <= args.src_max \
                and args.ref_min <= len(ex.ref) <= args.ref_max
        )

        test_iter = Iterator(
            dataset=test_data,
            batch_size=args.batch_size,
            shuffle=True,
            repeat=False,
        )

        for batch in tqdm(test_iter, total=len(test_iter)):
            input_ids = torch.cat([batch.src, batch.ref[:, 1:]],
                                  dim=1).to(device)

            token_type_ids = [
                torch.zeros_like(batch.src),
                torch.ones_like(batch.ref[:, 1:])
            ]
            token_type_ids = torch.cat(token_type_ids, dim=1).to(device)
            outputs = model(input_ids, token_type_ids=token_type_ids)[0]

            for src, ref, out in zip(batch.src, batch.ref, outputs):
                src = src[1:src.tolist().index(tokenizer.sep_token_id)]
                ref = ref[1:ref.tolist().index(tokenizer.sep_token_id)]
                src = tokenizer.decode(src)
                ref = tokenizer.decode(ref)
                if args.num_labels > 1:
                    out = torch.argmax(out)
                print(src + '\t' + ref + '\t' + str(out.item()))