Ejemplo n.º 1
0
def save_dictionary(worddict, wordcount, loc='./data/book_dictionary_large.pkl'):
    """
    Save a dictionary to the specified location
    """
    with open(loc, 'wb') as f:
        pkl.dump(worddict, f)
        pkl.dump(wordcount, f)
def main():
    """Generate intial word embedding for headlines and description."""
    headlines, desc = load_text()  # load headlines and descriptions
    vocab, vocab_count = build_vocab(headlines + desc)  # build vocabulary
    summarize_vocab(vocab, vocab_count)  # summarize vocabulary
    word2idx, idx2word = get_idx(vocab)  # add special tokens and get reverse vocab lookup
    glove_embedding_weights, glove_index_dict = get_glove()  # load GloVe data

    # initialize embedding
    embedding = initialize_embedding(vocab_size, embedding_dim, glove_embedding_weights)
    embedding = copy_glove_weights(embedding, idx2word, glove_embedding_weights, glove_index_dict)

    # map vocab to GloVe using cosine similarity
    glove_idx2idx = build_word_to_glove(embedding, word2idx, idx2word, glove_index_dict, glove_embedding_weights)

    # create a dense vector representation of headlines and descriptions
    description_vector = to_dense_vector(word2idx, desc, 'description')
    headline_vector = to_dense_vector(word2idx, headlines, 'headline')

    # write vocabulary to disk
    with open(path.join(config.path_data, '{}.pkl'.format(FN)), 'wb') as fp:
        pickle.dump((embedding, idx2word, word2idx, glove_idx2idx), fp, 2)

    # write data to disk
    with open(path.join(config.path_data, '{}.data.pkl'.format(FN)), 'wb') as fp:
        pickle.dump((description_vector, headline_vector), fp, 2)
Ejemplo n.º 3
0
    def save_results(self, output_name, keep_data=False):
        '''
        Save the results of the SCF to avoid re-computing.
        The pickled file will not include the data cube by default.

        Parameters
        ----------
        output_name : str
            Name of the outputted pickle file.
        keep_data : bool, optional
            Save the data cube in the pickle file when enabled.
        '''

        if not output_name.endswith(".pkl"):
            output_name += ".pkl"

        self_copy = deepcopy(self)

        # Don't keep the whole cube unless keep_data enabled.
        if not keep_data:
            self_copy._centroid = None
            self_copy._moment0 = None
            self_copy._linewidth = None

        with open(output_name, 'wb') as output:
                pickle.dump(self_copy, output, -1)
Ejemplo n.º 4
0
def importAndSave(foldername, saveName):
	print('Importing, processing, and saving data for analysis...')
	data = TC.AllExperimentData(foldername)
	print(saveName)
	with open(TCG.DATA_SAVE_PATH + saveName, 'wb') as output:
		pickle.dump(data, output, -1)
	print('Done.')
	return data
def save_to_numpy(seriesuid, img, meta):
    file = '{}/{}'.format(PREPROCESS_PATH, seriesuid)

    with h5py.File(file + '.h5', 'w') as hf:
        hf.create_dataset('img', data=img)

    with open(file + '.meta', 'wb') as f:
        pickle.dump(meta, f)
Ejemplo n.º 6
0
 def save_binary(self, configuration):
     YLogger.info(self, "Saving binary brain to [%s]", configuration.binaries.binary_filename)
     start = datetime.datetime.now()
     bin_file = open(configuration.binaries.binary_filename, "wb")
     pickle.dump(self._aiml_parser, bin_file)
     bin_file.close()
     stop = datetime.datetime.now()
     diff = stop - start
     YLogger.info(self, "Brain save took a total of %.2f sec", diff.total_seconds())
def savePickle(data):
	#open file for editing
	pickl = open('data.pkl', 'wb');
	
	#write object to file
	pickle.dump(data, pickl)
	
	# close at the end
	pickl.close()
Ejemplo n.º 8
0
def sigmoid(x):
    with np.errstate(all='raise'):
        try:
            rv = 1.0 / (1.0 + np.exp(-x))
        except FloatingPointError:
            pickle.dump(model, open('save_err.p', 'wb'))
            logger.error('sigmoid(x): Floating point error because of %f',x)
            sys.exit(1)
    return rv
Ejemplo n.º 9
0
def saveWTL(config, p1, p2, w, t, l):
    if w > 0 or t > 0 or l > 0:
        data = {
            "player1": p1,
            "player2": p2,
            "wins": w,
            "ties": t,
            "losses": l}
        pickle.dump(data, open(config.data.performance_location+"staged_"+str(time())+".pickle","wb"))
        sleep(0.05)
Ejemplo n.º 10
0
def dsigmoid(x):
    with np.errstate(over='raise'):
        try:
            ex = np.exp(x)
            rv = ex / ((ex + 1.0)**2.0)
        except FloatingPointError:
            pickle.dump(model, open('save_err.p', 'wb'))
            logger.error('dsigmoid(x): Floating point error because of %f',x)
            sys.exit(1)
    return rv
Ejemplo n.º 11
0
def cpdump(data, filename, outDir=None):
    # import cPickle
    import _pickle as cPickle

    if outDir != None:
        filename = outDir + filename
    fp = open(filename, "wb")
    # fp=file(filename,'wb')
    cPickle.dump(data, fp)
    fp.close()
Ejemplo n.º 12
0
 def save(self, config):
     # Saving
     print("==> Saving models")
     os.system('mkdir -p ' + config.get('save'))
     filename = os.path.join(config.get('save'), config.get('name'))
     _pickle.dump({'config': self.config, 'record': self.record},
                   open(filename+'_main', 'wb'))
     _pickle.dump(self.model.getSequense(),
                  open(filename+'_sequence', 'wb'),
                  protocol=4)
     print("==> Saving done.")
Ejemplo n.º 13
0
	def save(self,filename):

		"""
		Pickle the TransferFunction instance

		:param filename: name of the file to save the instance to
		:type filename: str.

		"""

		with open(filename,"wb") as fp:
			pkl.dump(self,fp,protocol=2)
Ejemplo n.º 14
0
 def save_strategy_as(self):
     """
     Save the strategy to a pickle file so it can be imported in
     another copy of the GSF Parser.
     """
     file_name = filedialog.asksaveasfilename(
         filetypes=[("GSF Strategy", ".str")], defaultextension=".str",
         title="GSF Strategy Manager: Save a strategy")
     if file_name == "" or file_name is None:
         return
     strategy = self.list.db[self.list.selected_strategy]
     with open(file_name, "wb") as fo:
         pickle.dump(strategy, fo)
Ejemplo n.º 15
0
 def play_games(self, env=None):
     if env is None:
         env = gym.make(self.gamename)
     
     game_results = []
     for _ in range(self.worker_replays):
         game_results.append(self.play_game(env))
         
     filename = 'tmp/'+ str(np.random.rand()) + '.pickle'
     f = open(filename, 'wb')
     cPickle.dump(game_results, f)
     f.close()
     return filename
Ejemplo n.º 16
0
def train_model(input_to_softmax, 
                pickle_path,
                save_model_path,
                train_json='train_corpus.json',
                valid_json='valid_corpus.json',
                minibatch_size=20,
                spectrogram=True,
                mfcc_dim=13,
                optimizer=SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5),
                epochs=1,
                verbose=1,
                sort_by_duration=False,
                max_duration=10.0):
    
    # create a class instance for obtaining batches of data
    audio_gen = AudioGenerator(minibatch_size=minibatch_size, 
        spectrogram=spectrogram, mfcc_dim=mfcc_dim, max_duration=max_duration,
        sort_by_duration=sort_by_duration)
    # add the training data to the generator
    audio_gen.load_train_data(train_json)
    audio_gen.load_validation_data(valid_json)
    # calculate steps_per_epoch
    num_train_examples=len(audio_gen.train_audio_paths)
    steps_per_epoch = num_train_examples//minibatch_size
    # calculate validation_steps
    num_valid_samples = len(audio_gen.valid_audio_paths) 
    validation_steps = num_valid_samples//minibatch_size
    
    # add CTC loss to the NN specified in input_to_softmax
    model = add_ctc_loss(input_to_softmax)

    # CTC loss is implemented elsewhere, so use a dummy lambda function for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer)

    # make results/ directory, if necessary
    if not os.path.exists('results'):
        os.makedirs('results')

    # add checkpointer
    checkpointer = ModelCheckpoint(filepath='results/'+save_model_path, verbose=0)

    # train the model
    hist = model.fit_generator(generator=audio_gen.next_train(), steps_per_epoch=steps_per_epoch,
        epochs=epochs, validation_data=audio_gen.next_valid(), validation_steps=validation_steps,
        callbacks=[checkpointer], verbose=verbose)

    # save model loss
    with open('results/'+pickle_path, 'wb') as f:
        pickle.dump(hist.history, f)
def pickleIt(obj, save_path):
    '''
        function to pickle the given object.
        @param
        obj => the python object to be pickled
        save_path => the path where the pickled file is to be saved
        @return => nothing (the pickle file gets saved at the given location)
    '''
    if(not os.path.isfile(save_path)):
        with open(save_path, 'wb') as dumping:
            pickle.dump(obj, dumping)

        print("The file has been pickled at:", save_path)

    else:
        print("The pickle file already exists: ", save_path)
Ejemplo n.º 18
0
def get_tasks(task_names, max_seq_len, load):
    '''
    Load tasks
    '''
    tasks = []
    for name in task_names:
        assert name in NAME2INFO, 'Task not found!'
        pkl_path = NAME2INFO[name][1] + "%s_task.pkl" % name
        if os.path.isfile(pkl_path) and load:
            task = pkl.load(open(pkl_path, 'rb'))
            log.info('\tLoaded existing task %s', name)
        else:
            task = NAME2INFO[name][0](NAME2INFO[name][1], max_seq_len, name)
            pkl.dump(task, open(pkl_path, 'wb'))
        tasks.append(task)
    log.info("\tFinished loading tasks: %s.", ' '.join([task.name for task in tasks]))
    return tasks
Ejemplo n.º 19
0
def do_python_eval(devkit_path, year, image_set, classes, output_dir = 'results'):
    annopath = os.path.join(
        devkit_path,
        'VOC' + year,
        'Annotations',
        '{}.xml')
    imagesetfile = os.path.join(
        devkit_path,
        'VOC' + year,
        'ImageSets',
        'Main',
        image_set + '.txt')
    cachedir = os.path.join(devkit_path, 'annotations_cache')
    aps = []
    # The PASCAL VOC metric changed in 2010
    use_07_metric = True if int(year) < 2010 else False
    print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
    print('devkit_path=',devkit_path,', year = ',year)

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    for i, cls in enumerate(classes):
        if cls == '__background__':
            continue
        filename = get_voc_results_file_template(image_set).format(cls)
        rec, prec, ap = voc_eval(
            filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
            use_07_metric=use_07_metric)
        aps += [ap]
        print('AP for {} = {:.4f}'.format(cls, ap))
        with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
            cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
    print('Mean AP = {:.4f}'.format(np.mean(aps)))
    print('~~~~~~~~')
    print('Results:')
    for ap in aps:
        print('{:.3f}'.format(ap))
    print('{:.3f}'.format(np.mean(aps)))
    print('~~~~~~~~')
    print('')
    print('--------------------------------------------------------------')
    print('Results computed with the **unofficial** Python eval code.')
    print('Results should be very close to the official MATLAB eval code.')
    print('-- Thanks, The Management')
    print('--------------------------------------------------------------')
Ejemplo n.º 20
0
  def _resize32x32(self, full_filepath):
    def _resize(data_in):
      num_samples = data_in.shape[0]
      tmp_data_out = np.zeros((num_samples, 1, 32, 32))
      for i in range(0, num_samples):
        tmp_img = data_in[i, :].reshape(28, 28)
        new_img = cv2.resize(tmp_img, dsize=(32, 32), interpolation=cv2.INTER_NEAREST)
        tmp_data_out[i, 0, :, :] = new_img
      return tmp_data_out

    f = gzip.open(full_filepath, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()
    with gzip.open(full_filepath, 'wb') as handle:
      cPickle.dump(([_resize(train_set[0]), train_set[1]],
                    [_resize(valid_set[0]), valid_set[1]],
                    [_resize(test_set[0]), test_set[1]]),
                   handle)
def pickle_save(filename,model,y_pred,y_valid):
    ### SAVE Processed Data
    pickle_file = '%s/%s'%(Dir,filename)
    try:
      f = open(pickle_file, 'wb')
      save = {
        'model': model,
        'y_pred': y_pred,
        'y_valild': y_valid,
        }
      #pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
      pickle.dump(save, f)
      f.close()
    except Exception as e:
      print('Unable to save data to', pickle_file, ':', e)
      raise
    ### SAVED
    return
Ejemplo n.º 22
0
    def __init__(self, config_file_path, session_file_path, repo=None):
        self._gerrit_url = None
        self._jira = None
        self._repo = repo

        if os.path.isfile(session_file_path):
            with open(session_file_path, 'rb') as session_file:
                session = pickle.load(session_file)
        else:
            session = None

        if config_file_path is not None:
            self._read_config(config_file_path, session)

        if session is not None:
            session.max_retries = 3
            self._jira._session = session
        else:
            with open(session_file_path, 'wb') as session_file:
                pickle.dump(self._jira._session, session_file)
Ejemplo n.º 23
0
def transform_text_2_sentences(train, test, save_train='../output/train_text.p', save_test='../output/test_text.p'):
    '''
    Transforming raw text into sentences, if @save_train or @save_test in not None - saves
    pickles for further use
    '''
    train_text = []
    test_text = []
    for each in train['Combined']:
        train_text.append(text_process(each))
    
    for each in test['Combined']:
        test_text.append(text_process(each))
        
    if(save_train != None):
        cPickle.dump(train_text, open(save_train, 'wb'))
        
    if(save_test != None):
        cPickle.dump(test_text, open(save_test, 'wb'))
        
    return train_text, test_text
Ejemplo n.º 24
0
  def write_to_file(self, save_file):
    "Write all the times to file."
    try:
      with TestTimes.LockedFile(save_file, 'a+b') as fd:
        times = TestTimes.__read_test_times_file(fd)

        if times is None:
          times = self.__times
        else:
          times.update(self.__times)

        # We erase data from file while still holding a lock to it. This
        # way reading old test times and appending new ones are atomic
        # for external viewer.
        fd.seek(0)
        fd.truncate()
        with gzip.GzipFile(fileobj=fd, mode='wb') as gzf:
          cPickle.dump(times, gzf, PICKLE_HIGHEST_PROTOCOL)
    except IOError:
      pass  # ignore errors---saving the times isn't that important
Ejemplo n.º 25
0
  def preprocess(self, input_file, vocab_file, tensor_file):
    with codecs.open(input_file, "r", encoding=self.encoding) as f:
      train_data = f.read()
      train_data = normalize_unicodes(train_data)

    counter = collections.Counter(train_data)
    count_pairs = sorted(counter.items(), key=lambda x: -x[1])
    threshold = 10
    self.chars, counts = zip(*count_pairs)
    self.chars = START_VOCAB + [c for i, c in enumerate(self.chars) if c not in START_VOCAB and counts[i] > threshold]
    self.vocab_size = len(self.chars)
    self.vocab = dict(zip(self.chars, range(len(self.chars))))
    with open(vocab_file, 'wb') as f:
      cPickle.dump(self.chars, f)
    unk_index = START_VOCAB.index(UNK)
    self.tensor = np.array([self.vocab.get(c, unk_index) for c in train_data], dtype=np.int64)
    train_size = int(self.tensor.shape[0] * 0.9)
    self.valid = self.tensor[train_size:]
    self.train = self.tensor[:train_size]
    np.save(tensor_file, self.tensor)
Ejemplo n.º 26
0
    def save(self, fpath='.', fname=None):
        """ Save a pickled representation of Model state. """
        fpathstart, fpathext = os.path.splitext(fpath)
        if fpathext == '.pkl':
            # User supplied an absolute path to a pickle file
            fpath, fname = os.path.split(fpath)

        elif fname is None:
            # Generate filename based on date
            date_obj = datetime.datetime.now()
            date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S')
            class_name = self.__class__.__name__
            fname = '%s.%s.pkl' % (class_name, date_str)

        fabspath = os.path.join(fpath, fname)

        logger.info("Saving to %s ..." % fabspath)
        file = open(fabspath, 'wb')
        state = self.__getstate__()
        pickle.dump(state, file, protocol=pickle.HIGHEST_PROTOCOL)
        file.close()
def get_meta_dict():
    cache_file = '{}/all_meta_cache.meta'.format(PREPROCESS_PATH)
    if os.path.exists(cache_file):
        print('get meta_dict from cache')
        with open(cache_file, 'rb') as f:
            return pickle.load(f)

    meta_dict = {}
    for f in glob('{}/*.meta'.format(PREPROCESS_PATH)):
        seriesuid = f[-15:-5]
        if not os.path.exists('{}/{}.h5'.format(PREPROCESS_PATH, seriesuid)):
            continue

        with open(f, 'rb') as f:
            meta = pickle.load(f)
            meta_dict[meta['seriesuid']] = meta

    # cache it
    with open(cache_file, 'wb') as f:
        pickle.dump(meta_dict, f)

    return meta_dict
Ejemplo n.º 28
0
def train(ai, config):
    loaded_files = []
    x = config.iterations
    i = len(glob.glob(config.data.model_location+"*.h5"))
    loaded_files, _ = load_games(ai, loaded_files, config)
    while(x != 0):
        if i > config.iter3:
            ai.update_lr(config.learning_rate3)
        elif i > config.iter2:
            ai.update_lr(config.learning_rate2)
        else:
            ai.update_lr(config.learning_rate1)
        loaded_files, diff = load_games(ai, loaded_files, config)
        total_diff = diff
        start = time()
        print("Iteration %04d"%i)
        end = config.min_new_game_files if i> 0 else config.min_game_file
        util.print_progress_bar(0, end, start=start)
        while(total_diff < end):
            if diff > 0:
                total_diff += diff
                util.print_progress_bar(total_diff, end, start=start)
            sleep(5)
            loaded_files, diff = load_games(ai, loaded_files, config)
        util.print_progress_bar(end, end, start=start)
        print("Training for %d batches on %d samples" % (config.batches_per_iter, len(ai.buffer.buffer)))
        start = time()
        history = ai.train_batches(config.batch_size, config.batches_per_iter, config.verbose)
        for val in history.history.keys():
            print("%s: %0.4f" % (val, history.history[val][-1]))
        if i % config.save_model_cycles == 0:
            ai.save("%smodel_%04d.h5" % (config.data.model_location, i))
			
        file = open("%shist_%04d.pickle" % (config.data.history_location, i), 'wb') 
        pickle.dump(pickle.dumps(history.history), file)
        file.close() 
        print("Iteration Time: %0.2f" % (time()-start))
        x -= 1
        i += 1
Ejemplo n.º 29
0
	def save(self,filename,format=None,**kwargs):
		
		"""
		Save ensemble data in an external file (in arbitrary format)

		:param filename: file name of the external file
		:type filename: str.

		:format: format in which to save the ensemble; if None the format is auto detected from the filename
		:type format: str.or callable

		:param kwargs: the keyword arguments are passed to the saver (or to format if callable)
		:type kwargs: dict.

		"""

		#Auto detect format
		if format is None:
			if filename.endswith(".npy"):
				format = "numpy"
			elif filename.endswith(".mat"):
				format = "matlab"
			elif filename.endswith(".pkl"):
				format = "pickle"
			else:
				raise ValueError("Format not recognized!")


		#Proceed to the saving procedure
		if format=="numpy":
			np.save(filename,self.values)
		elif format=="matlab":
			sio.savemat(filename,{"values": self.values},**kwargs)
		elif format=="pickle":
			with open(filename,"wb") as fp:
				pickle.dump(self,fp)
		else:
			format(self,filename,**kwargs)
Ejemplo n.º 30
0
 def write_uncorrected_flux(self, uncorrected_flux):
     """writes each power slice to the file output.csv. 3d->file"""
     # There might be an error here in the cube writing to file, y instead of z split.
     # screw it, 4d array to 3d file doesn't work well.
     st=""
     u_flux=uncorrected_flux
     
     # voxel dimensions
     zlen=len(u_flux)
     ylen=len(u_flux[0])
     xlen=len(u_flux[0][0])
     
     for k in range(0, zlen):
         s=""
         power_temp=cell2(xlen, ylen)        
         for i in range(0, ylen):
             for j in range(0, xlen):
                 # flipdim unnecesary
                 power_temp[i][j]=u_flux[k][i][j]
         
         
         power_temp_s=[list(map(str, i)) for i in power_temp]
         
         s+="\n".join([",".join(i) for i in power_temp_s])
         
         
         if k==0:
             st+=s
         else:
             st+="\n\n"+s
   
     f=open("raw_flux.csv", "w")
     f.write(st)
     f.close()
     
     f2=open("pickle\\raw_flux.pkl", "wb")
     pickle.dump(u_flux, f2)
     f2.close()
Ejemplo n.º 31
0
        output_ = output[-1].data
        targets_ = targets[-1].data
        targets_ = targets_.unsqueeze(dim=1)
        if len(output_.shape) < 3: output_.unsqueeze(dim=0)

        CELoss = torch.gather(output_, dim=1, index=targets_).squeeze()
        CELoss = -1 * CELoss
        save_all_losses += CELoss.tolist()
        loss = torch.sum(CELoss)

        total_loss += loss
        n += targets_.shape[0]

        if (n % 20000) == 0: print('Processed %d examples' % n)

        del output, targets, hidden

    print('Last word: %s' % (corpus.dictionary.idx2word[data.data[-1, -1]]))
    print('Total examples processed:', n)
    return total_loss / n, save_all_losses


loss, all_losses = evaluate(data_, args)
res = [args.seq_len, loss, math.exp(loss)]
print(res)

with open(os.path.join(args.logdir, 'per_token_scores_' + str(args.seq_len)),
          'wb') as f:
    pickle.dump(args.seq_len, f)
    pickle.dump(all_losses, f)
Ejemplo n.º 32
0
def main():
    train_pos, train_neg, test_pos, test_neg = load_data(path_to_data)

    # Using saved models and vectors for method == 'nlp'. (Orginal runtime = 5 mins; Current runtime = 10 seconds)
    if method == "nlp":
        train_pos_vec, train_neg_vec, test_pos_vec, test_neg_vec = feature_vecs_NLP(
            train_pos, train_neg, test_pos, test_neg)
        filename = './' + path_to_data + 'train_pos_vec_nlp.txt'
        pickle.dump(train_pos_vec, open(filename, 'wb'))
        train_pos_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'train_neg_vec_nlp.txt'
        pickle.dump(train_neg_vec, open(filename, 'wb'))
        train_neg_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'test_pos_vec_nlp.txt'
        pickle.dump(test_pos_vec, open(filename, 'wb'))
        test_pos_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'test_neg_vec_nlp.txt'
        pickle.dump(test_neg_vec, open(filename, 'wb'))
        test_neg_vec = pickle.load(open(filename, 'rb'))

        nb_model, lr_model = build_models_NLP(train_pos_vec, train_neg_vec)
        filename = './' + path_to_data + 'nb_model_nlp.sav'
        pickle.dump(nb_model, open(filename, 'wb'))
        nb_model = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'lr_model_nlp.sav'
        pickle.dump(lr_model, open(filename, 'wb'))
        lr_model = pickle.load(open(filename, 'rb'))

    # Using saved models and vectors for method == 'd2v'. (Orginal runtime = 10 mins; Current runtime = 10 seconds)
    if method == "d2v":
        train_pos_vec, train_neg_vec, test_pos_vec, test_neg_vec = feature_vecs_DOC(
            train_pos, train_neg, test_pos, test_neg)
        filename = './' + path_to_data + 'train_pos_vec_d2v.txt'
        pickle.dump(train_pos_vec, open(filename, 'wb'))
        train_pos_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'train_neg_vec_d2v.txt'
        pickle.dump(train_neg_vec, open(filename, 'wb'))
        train_neg_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'test_pos_vec_d2v.txt'
        pickle.dump(test_pos_vec, open(filename, 'wb'))
        test_pos_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'test_neg_vec_d2v.txt'
        pickle.dump(test_neg_vec, open(filename, 'wb'))
        test_neg_vec = pickle.load(open(filename, 'rb'))

        nb_model, lr_model = build_models_DOC(train_pos_vec, train_neg_vec)
        filename = './' + path_to_data + 'nb_model_d2v.sav'
        pickle.dump(nb_model, open(filename, 'wb'))
        nb_model = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'lr_model_d2v.sav'
        pickle.dump(lr_model, open(filename, 'wb'))
        lr_model = pickle.load(open(filename, 'rb'))

    if method == "w2v":
        train_pos_vec, train_neg_vec, test_pos_vec, test_neg_vec = feature_vecs_DOC_W2V(
            train_pos, train_neg, test_pos, test_neg)
        filename = './' + path_to_data + 'train_pos_vec_w2v.txt'
        pickle.dump(train_pos_vec, open(filename, 'wb'))
        #train_pos_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'train_neg_vec_w2v.txt'
        pickle.dump(train_neg_vec, open(filename, 'wb'))
        #train_neg_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'test_pos_vec_w2v.txt'
        pickle.dump(test_pos_vec, open(filename, 'wb'))
        #test_pos_vec = pickle.load(open(filename, 'rb'))
        filename = './' + path_to_data + 'test_neg_vec_w2v.txt'
        pickle.dump(test_neg_vec, open(filename, 'wb'))
        #test_neg_vec = pickle.load(open(filename, 'rb'))

        nb_model, lr_model = build_models_DOC_W2V(train_pos_vec, train_neg_vec)
        filename = './' + path_to_data + 'nb_model_w2v.sav'
        pickle.dump(nb_model, open(filename, 'wb'))
        filename = './' + path_to_data + 'lr_model_w2v.sav'
        pickle.dump(lr_model, open(filename, 'wb'))

    print("Naive Bayes")
    print("-----------")
    evaluate_model(nb_model, test_pos_vec, test_neg_vec, True)

    print("")
    print("Logistic Regression")
    print("-------------------")
    evaluate_model(lr_model, test_pos_vec, test_neg_vec, True)
Ejemplo n.º 33
0
    def generate_dic(self):

        current_dict = {}
        doc_length = []
        #         c2 = 0
        ''' parse the documents'''
        for j in range(1, 21579):
            filename_txt = 'tokenization\\%d.txt' % j
            f = open(filename_txt, 'r')
            ''' strip document into token list'''
            data = f.read().split()
            token_stream = data
            m_length = len(token_stream)
            doc_length.append([j, m_length])
            i = 0
            print('read doc %d' % j)

            while (i < len(token_stream)):

                while (SPIMI.MemorySize * 1024 * 1024 -
                       self.current_used_memory_size
                       ) > 0 and i < len(token_stream):

                    self.current_used_memory_size = 0

                    if not token_stream[i] in current_dict:
                        current_dict[token_stream[i]] = []
#                         c1 = c1 + 1

#                     print ('current dic size is %d'%(sys.getsizeof(current_dict)))
#                     print ('current dic size of mb is %f'%(sys.getsizeof(current_dict)/1024.0/1024.0))
# #

                    'check if the current term is already in the same doc in order to compute the term frequency'
                    'current posting list is empty'
                    if current_dict[token_stream[i]] == []:
                        current_dict[token_stream[i]].append([j, 1])
                        self.c = self.c + 1
                    elif current_dict[token_stream[i]][-1][0] == j:
                        current_dict[token_stream[i]][-1][1] += 1
                        self.c = self.c + 1
                    else:
                        current_dict[token_stream[i]].append([j, 1])
                        self.c = self.c + 1

                    self.current_used_memory_size = sys.getsizeof(current_dict)
                    i = i + 1
                ''' memory is over '''
                if SPIMI.MemorySize * 1024 * 1024 - self.current_used_memory_size <= 0:
                    self.num = self.num + 1
                    print(' memory done')
                    print(current_dict)
                    '''sort '''
                    key_list = sorted(current_dict.keys())
                    print('new dic %d' % (self.num))
                    print(key_list)
                    '''write dictionary into disk'''
                    filename = 'dic\\dic%d.txt' % (self.num)
                    f = open(filename, "wb")
                    cPickle.dump(current_dict, f)
                    f.close()
                    '''write key list into disk'''
                    filename = 'dic\\keylist%d.txt' % (self.num)
                    f = open(filename, "wb")
                    cPickle.dump(key_list, f)
                    f.close()

                    self.current_used_memory_size = 0
                    current_dict = {}

                elif j == 21578:
                    self.num = self.num + 1
                    print(' last one ')
                    print(current_dict)
                    '''sort '''
                    key_list = sorted(current_dict.keys())
                    print('new dic %d' % (self.num))
                    print(key_list)
                    '''write dictionary into disk'''
                    filename = 'dic\\dic%d.txt' % (self.num)
                    f = open(filename, "wb")
                    cPickle.dump(current_dict, f)
                    f.close()
                    '''write key list into disk'''
                    filename = 'dic\\keylist%d.txt' % (self.num)
                    f = open(filename, "wb")
                    cPickle.dump(key_list, f)
                    f.close()
                    break
        '''write doc length into disk'''
        filename = 'dic\\doc.txt'
        f = open(filename, "wb")
        cPickle.dump(doc_length, f)
        f.close()
        print('out of loop')
Ejemplo n.º 34
0
def compressPickle(fName, data):
    with bz2.BZ2File(fName + '.pbz2', 'w') as f: 
        cPickle.dump(data, f)
Ejemplo n.º 35
0
    xs,hs,dlogps,drs = [],[],[],[] # reset array memory

    # compute the discounted reward backwards through time
    discounted_epr = discount_rewards(epr)
    # standardize the rewards to be unit normal (helps control the gradient estimator variance)
    discounted_epr -= np.mean(discounted_epr)
    discounted_epr /= np.std(discounted_epr)

    epdlogp *= discounted_epr # modulate the gradient with advantage (PG magic happens right here.)
    grad = policy_backward(eph, epdlogp)
    for k in model: grad_buffer[k] += grad[k] # accumulate grad over batch

    # perform rmsprop parameter update every batch_size episodes
    if episode_number % batch_size == 0:
      for k,v in model.items():
        g = grad_buffer[k] # gradient
        rmsprop_cache[k] = decay_rate * rmsprop_cache[k] + (1 - decay_rate) * g**2
        model[k] += learning_rate * g / (np.sqrt(rmsprop_cache[k]) + 1e-5)
        grad_buffer[k] = np.zeros_like(v) # reset batch gradient buffer

    # boring book-keeping
    running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
    print('resetting env. episode reward total was %f. running mean: %f' % (reward_sum, running_reward))
    if episode_number % 10 == 0: cPickle.dump(model, open('save_space.p', 'wb'))
    reward_sum = 0
    observation = env.reset() # reset env
    prev_x = None

  if reward != 0: # Pong has either +1 or -1 reward exactly when game ends.
    print('ep %d: game finished, reward: %f' % (episode_number, reward) + ('' if reward == -1 else ' !!!!!!!!'))
Ejemplo n.º 36
0
# gather the information into a list of tuple
RNA_SS_data = collections.namedtuple('RNA_SS_data',
                                     'seq ss_label length name pairs')
RNA_SS_data_list = list()
for i in range(len(data_list)):
    RNA_SS_data_list.append(
        RNA_SS_data(seq=seq_encoding_list_padded[i],
                    ss_label=stru_encoding_list_padded[i],
                    length=seq_len_list[i],
                    name=file_list[i],
                    pairs=pairs_list[i]))

## training test split
RNA_SS_train, RNA_SS_test = train_test_split(RNA_SS_data_list,
                                             test_size=0.2,
                                             random_state=seed)

RNA_SS_test, RNA_SS_val = train_test_split(RNA_SS_test,
                                           test_size=0.5,
                                           random_state=seed)

# savepath = dataset+"_"+"_".join(rna_types)
savepath = dataset + '_all_{}'.format(length_limit)
# savepath = dataset+'_all'
os.mkdir(savepath)

for i in ['train', 'test', 'val']:
    with open(savepath + '/%s.pickle' % i, 'wb') as f:
        cPickle.dump(eval('RNA_SS_' + i), f)
Ejemplo n.º 37
0
def train_model(input_to_softmax,
                pickle_path,
                save_model_path,
                train_json='train_corpus.json',
                valid_json='valid_corpus.json',
                minibatch_size=100,
                spectrogram=True,
                mfcc_dim=13,
                optimizer=SGD(lr=0.01,
                              decay=1e-6,
                              momentum=0.9,
                              nesterov=True,
                              clipnorm=5),
                epochs=20,
                verbose=1,
                sort_by_duration=False,
                max_duration=10.0):

    # create a class instance for obtaining batches of data
    audio_gen = AudioGenerator(minibatch_size=minibatch_size,
                               spectrogram=spectrogram,
                               mfcc_dim=mfcc_dim,
                               max_duration=max_duration,
                               sort_by_duration=sort_by_duration)
    # add the training data to the generator
    audio_gen.load_train_data(train_json)
    audio_gen.load_validation_data(valid_json)
    # calculate steps_per_epoch
    num_train_examples = len(audio_gen.train_audio_paths)
    steps_per_epoch = num_train_examples // minibatch_size
    # calculate validation_steps
    num_valid_samples = len(audio_gen.valid_audio_paths)
    validation_steps = num_valid_samples // minibatch_size

    # add CTC loss to the NN specified in input_to_softmax
    model = add_ctc_loss(input_to_softmax)

    # CTC loss is implemented elsewhere, so use a dummy lambda function for the loss
    model.compile(loss={
        'ctc': lambda y_true, y_pred: y_pred
    },
                  optimizer=optimizer)

    # make results/ directory, if necessary
    if not os.path.exists('results'):
        os.makedirs('results')

    # add checkpointer
    checkpointer = ModelCheckpoint(filepath='results/' + save_model_path,
                                   verbose=0)

    # train the model
    hist = model.fit_generator(generator=audio_gen.next_train(),
                               steps_per_epoch=steps_per_epoch,
                               epochs=epochs,
                               validation_data=audio_gen.next_valid(),
                               validation_steps=validation_steps,
                               callbacks=[checkpointer],
                               verbose=verbose)

    # save model loss
    with open('results/' + pickle_path, 'wb') as f:
        pickle.dump(hist.history, f)
Ejemplo n.º 38
0
sys.path.append("./data/")
from process_documets import load_dataset
from utills import *

LOAD_DATA_FROM_SCRATCH = True
print("Making model")
model = get_model()
print("Done Making model")

if LOAD_DATA_FROM_SCRATCH:
    X, Y = load_dataset()
    X, Y = prepare_loaded_dataset_for_training(X, Y, ONE_SIDE_CONTEXT_SIZE)

    with open("X.pkl", "wb", encoding='utf-8') as f:
        cPickle.dump(X, f)
    with open("Y.pkl", "wb", encoding='utf-8') as f:
        cPickle.dump(Y, f)
else:
    with open("X.pkl", "r", encoding='utf-8') as f:
        X = cPickle.load(f)
    with open("Y.pkl", "r", encoding='utf-8') as f:
        Y = cPickle.load(f)

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy', 'sparse_categorical_accuracy'])

checkpoints = ModelCheckpoint('trained_model.{epoch:02d}-{val_loss:.3f}.hdf5',
                              monitor='acc',
                              verbose=1,
Ejemplo n.º 39
0
    def save_model(self, sess, path):
        save_target = path + '_iter%d' % self.epochs_trained
        dirname = os.path.dirname(save_target)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        self.saver.save(sess, save_target)

        dictionaries_path = save_target + '.dict'
        with open(dictionaries_path, 'wb') as file:
            pickle.dump(self.subtoken_to_index, file)
            pickle.dump(self.index_to_subtoken, file)
            pickle.dump(self.subtoken_vocab_size, file)

            pickle.dump(self.target_to_index, file)
            pickle.dump(self.index_to_target, file)
            pickle.dump(self.target_vocab_size, file)

            pickle.dump(self.node_to_index, file)
            pickle.dump(self.index_to_node, file)
            pickle.dump(self.nodes_vocab_size, file)

            pickle.dump(self.num_training_examples, file)
            pickle.dump(self.epochs_trained, file)
            pickle.dump(self.config, file)
        print('Saved after %d epochs in: %s' %
              (self.epochs_trained, save_target))
Ejemplo n.º 40
0
def test_from_rgb_detection(output_filename, result_dir=None):
    ''' Test frustum pointents with 2D boxes from a RGB detector.
    Write test results to KITTI format label files.
    todo (rqi): support variable number of points.
    '''
    ps_list = []
    segp_list = []
    center_list = []
    heading_cls_list = []
    heading_res_list = []
    size_cls_list = []
    size_res_list = []
    rot_angle_list = []
    score_list = []
    onehot_list = []

    test_idxs = np.arange(0, len(TEST_DATASET))
    print(len(TEST_DATASET))
    batch_size = BATCH_SIZE
    num_batches = int((len(TEST_DATASET) + batch_size - 1) / batch_size)

    batch_data_to_feed = np.zeros((batch_size, NUM_POINT, NUM_CHANNEL))
    batch_one_hot_to_feed = np.zeros((batch_size, 3))
    sess, ops = get_session_and_ops(batch_size=batch_size, num_point=NUM_POINT)
    for batch_idx in range(num_batches):
        print('batch idx: %d' % (batch_idx))
        start_idx = batch_idx * batch_size
        end_idx = min(len(TEST_DATASET), (batch_idx + 1) * batch_size)
        cur_batch_size = end_idx - start_idx

        batch_data, batch_rot_angle, batch_rgb_prob, batch_one_hot_vec = \
            get_batch(TEST_DATASET, test_idxs, start_idx, end_idx,
                NUM_POINT, NUM_CHANNEL, from_rgb_detection=True)
        batch_data_to_feed[0:cur_batch_size, ...] = batch_data
        batch_one_hot_to_feed[0:cur_batch_size, :] = batch_one_hot_vec

        # Run one batch inference
        batch_output, batch_center_pred, \
        batch_hclass_pred, batch_hres_pred, \
        batch_sclass_pred, batch_sres_pred, batch_scores = \
            inference(sess, ops, batch_data_to_feed,
                batch_one_hot_to_feed, batch_size=batch_size)

        for i in range(cur_batch_size):
            ps_list.append(batch_data[i, ...])
            segp_list.append(batch_output[i, ...])
            center_list.append(batch_center_pred[i, :])
            heading_cls_list.append(batch_hclass_pred[i])
            heading_res_list.append(batch_hres_pred[i])
            size_cls_list.append(batch_sclass_pred[i])
            size_res_list.append(batch_sres_pred[i, :])
            rot_angle_list.append(batch_rot_angle[i])
            #score_list.append(batch_scores[i])
            score_list.append(batch_rgb_prob[i])  # 2D RGB detection score
            onehot_list.append(batch_one_hot_vec[i])

    if FLAGS.dump_result:
        with open(output_filename, 'wp') as fp:
            pickle.dump(ps_list, fp)
            pickle.dump(segp_list, fp)
            pickle.dump(center_list, fp)
            pickle.dump(heading_cls_list, fp)
            pickle.dump(heading_res_list, fp)
            pickle.dump(size_cls_list, fp)
            pickle.dump(size_res_list, fp)
            pickle.dump(rot_angle_list, fp)
            pickle.dump(score_list, fp)
            pickle.dump(onehot_list, fp)

    # Write detection results for KITTI evaluation
    print('Number of point clouds: %d' % (len(ps_list)))
    write_detection_results(result_dir, TEST_DATASET.id_list,
                            TEST_DATASET.type_list, TEST_DATASET.box2d_list,
                            center_list, heading_cls_list, heading_res_list,
                            size_cls_list, size_res_list, rot_angle_list,
                            score_list)
    # Make sure for each frame (no matter if we have measurment for that frame),
    # there is a TXT file
    output_dir = os.path.join(result_dir, 'data')
    if FLAGS.idx_path is not None:
        to_fill_filename_list = [line.rstrip()+'.txt' \
            for line in open(FLAGS.idx_path)]
        fill_files(output_dir, to_fill_filename_list)
Ejemplo n.º 41
0
def test(output_filename, result_dir=None):
    ''' Test frustum pointnets with GT 2D boxes.
    Write test results to KITTI format label files.
    todo (rqi): support variable number of points.
    '''
    ps_list = []
    seg_list = []
    segp_list = []
    center_list = []
    heading_cls_list = []
    heading_res_list = []
    size_cls_list = []
    size_res_list = []
    rot_angle_list = []
    score_list = []

    test_idxs = np.arange(0, len(TEST_DATASET))
    batch_size = BATCH_SIZE
    num_batches = len(TEST_DATASET) / batch_size

    sess, ops = get_session_and_ops(batch_size=batch_size, num_point=NUM_POINT)
    correct_cnt = 0
    for batch_idx in range(num_batches):
        print('batch idx: %d' % (batch_idx))
        start_idx = batch_idx * batch_size
        end_idx = (batch_idx + 1) * batch_size

        batch_data, batch_label, batch_center, \
        batch_hclass, batch_hres, batch_sclass, batch_sres, \
        batch_rot_angle, batch_one_hot_vec = \
            get_batch(TEST_DATASET, test_idxs, start_idx, end_idx,
                NUM_POINT, NUM_CHANNEL)

        batch_output, batch_center_pred, \
        batch_hclass_pred, batch_hres_pred, \
        batch_sclass_pred, batch_sres_pred, batch_scores = \
            inference(sess, ops, batch_data,
                batch_one_hot_vec, batch_size=batch_size)

        correct_cnt += np.sum(batch_output == batch_label)

        for i in range(batch_output.shape[0]):
            ps_list.append(batch_data[i, ...])
            seg_list.append(batch_label[i, ...])
            segp_list.append(batch_output[i, ...])
            center_list.append(batch_center_pred[i, :])
            heading_cls_list.append(batch_hclass_pred[i])
            heading_res_list.append(batch_hres_pred[i])
            size_cls_list.append(batch_sclass_pred[i])
            size_res_list.append(batch_sres_pred[i, :])
            rot_angle_list.append(batch_rot_angle[i])
            score_list.append(batch_scores[i])

    print("Segmentation accuracy: %f" % \
        (correct_cnt / float(batch_size*num_batches*NUM_POINT)))

    if FLAGS.dump_result:
        with open(output_filename, 'wp') as fp:
            pickle.dump(ps_list, fp)
            pickle.dump(seg_list, fp)
            pickle.dump(segp_list, fp)
            pickle.dump(center_list, fp)
            pickle.dump(heading_cls_list, fp)
            pickle.dump(heading_res_list, fp)
            pickle.dump(size_cls_list, fp)
            pickle.dump(size_res_list, fp)
            pickle.dump(rot_angle_list, fp)
            pickle.dump(score_list, fp)

    # Write detection results for KITTI evaluation
    write_detection_results(result_dir, TEST_DATASET.id_list,
                            TEST_DATASET.type_list, TEST_DATASET.box2d_list,
                            center_list, heading_cls_list, heading_res_list,
                            size_cls_list, size_res_list, rot_angle_list,
                            score_list)
def save_testData(test_sets, path=MODEL_PATH):
    obj = {"data": test_sets}
    with bz2.BZ2File(path + "test_data.pbz2", "w") as f:
        pickle.dump(obj, f)
Ejemplo n.º 43
0
d = [2] 
path="./drive/My Drive/HW-1/images"
images=os.listdir(path)

dict_features={}
c=1
for im in images:
    print (c)
    print (im)
   

    im=path+"/"+im
    img = Image.open(im)
    basewidth = 300

    wpercent = (basewidth / float(img.size[0]))
    hsize = int((float(img.size[1]) * float(wpercent)))
    image = img.resize((basewidth, hsize), Image.ANTIALIAS)
  
    dist=get_probdist(image,bins,d)
    dict_features[im]=dist
    
    c=c+1
    


file = open('result', 'wb')
pickle.dump(dict_features, file)
file.close()
file = open('result', 'rb')
print(pickle.load( file))
Ejemplo n.º 44
0
    def __init__(self, dest, msgData):
        self.destUser = dest
        self.msgData = msgData

    def setMessageData(self, text):
        self.msgData = text

    def getMessageData(self):
        return self.msgData

    def converByteToString(self, byteData):
        return byteData.decode("utf-8")

    def converStringToByte(self, strData):
        return strData.encode(encoding='utf_8', errors='strict')

    def isGetMessage(self):
        if self.msgData:
            return "true"

    def getUserInfo(self):
        userInfo = (self.sendUser, self.destUser)
        return userInfo


msg = Message("khy4701", "1234")
data_string = cPickle.dump(msg.getMessageData(), )

print('pickle :', data_string)
    def __init__(
        self,
        task: str,
        dataroot: str,
        annotations_jsonpath: str,
        split: str,
        image_features_reader: ImageFeaturesH5Reader,
        gt_image_features_reader: ImageFeaturesH5Reader,
        tokenizer: BertTokenizer,
        bert_model,
        padding_index: int = 0,
        max_seq_length: int = 20,
        max_region_num: int = 37,
        hard_neg: bool = False,
    ):
        # All the keys in `self._entries` would be present in `self._image_features_reader`

        self._entries, self.imgid2entry = _load_annotations(
            annotations_jsonpath)
        self.image_id_list = [*self.imgid2entry]

        self._image_features_reader = image_features_reader
        self._tokenizer = tokenizer
        self.num_labels = 1
        self._split = split
        self._padding_index = padding_index
        self._max_region_num = max_region_num
        self._max_seq_length = max_seq_length
        self._hard_neg = hard_neg

        if self._split == "train" and self._hard_neg:
            image_info = cPickle.load(
                open(os.path.join(dataroot, "hard_negative" + ".pkl"), "rb"))
            for key, value in image_info.items():
                setattr(self, key, value)
            self.train_imgId2pool = {
                imageId: i
                for i, imageId in enumerate(self.train_image_list)
            }

        if "roberta" in bert_model:
            cache_path = os.path.join(
                dataroot,
                "cache",
                task + "_" + split + "_" + "roberta" + "_" +
                str(max_seq_length) + ".pkl",
            )
        else:
            cache_path = os.path.join(
                dataroot,
                "cache",
                task + "_" + split + "_" + str(max_seq_length) + ".pkl",
            )

        if not os.path.exists(cache_path):
            self.tokenize()
            self.tensorize()
            cPickle.dump(self._entries, open(cache_path, "wb"))
        else:
            print("loading entries from %s" % (cache_path))
            self._entries = cPickle.load(open(cache_path, "rb"))
Ejemplo n.º 46
0
def save_params(iter, params):
    #with open("saved_params_%d.npy" % iter, "w") as f:
    with open("saved_params_%d.npy" % iter, "wb") as f:
        pickle.dump(params, f)
        pickle.dump(random.getstate(), f)
Ejemplo n.º 47
0
def test(testID):
    print('\nTesting:')
    total_test_loss = 0
    total_test_loss_length = 0
    total_test_loss_number = 0

    confusion_matrix = np.zeros((num_classes, num_classes))

    bar = progressbar.ProgressBar(maxval=test_set_size, redirect_stdout=False)
    for i, test_song in enumerate(X_test):

        X = I_test[i]
        X = np.expand_dims(X, 0)
        num_samples = X.shape[0]
        c = C_test[i]
        Y = np.asarray(to_categorical(c, num_classes=num_classes))

        scores = model.evaluate(X, Y, batch_size=batch_size, verbose=verbose)
        if reset_states:
            model.reset_states()
        total_test_loss += scores[0]

        Y_predicted = model.predict(X, batch_size=batch_size, verbose=verbose)
        for y_val, y_predicted in zip(Y, Y_predicted):
            y_class_test = np.argmax(y_val)
            y_class_predicted = np.argmax(y_predicted)
            confusion_matrix[y_class_predicted, y_class_test] += 1

        bar.update(i + 1)

    accuracy = np.sum(np.diagonal(confusion_matrix)) / np.sum(confusion_matrix)
    total_test_loss_array.append(total_test_loss / test_set_size)
    total_test_accuracy_array.append(accuracy)
    print('\nTotal test loss: ', total_test_loss / test_set_size)
    print('Total accuracy: ' + str(accuracy * 100) + "%")
    print('-' * 50)
    plt.figure()
    plt.title('Style classification on instrument information')
    plt.plot(total_test_loss_array, label='Total test loss')
    plt.plot(total_train_loss_array, label='Total train loss')
    plt.plot(total_test_accuracy_array, label='Total test accuracy')
    plt.plot(total_train_accuracy_array, label='Total train accuracy')
    plt.legend(loc='lower left', prop={'size': 8})
    if show_plot: plt.show()
    if save_plot:
        plt.savefig(model_path + t + 'instrument_train.png')
        tikz_save(model_path + t + 'instrument_train.tex',
                  encoding='utf-8',
                  show_info=False)
    pickle.dump(total_test_loss_array,
                open(model_path + 'total_test_loss_array.pickle', 'wb'))
    pickle.dump(total_test_accuracy_array,
                open(model_path + 'total_test_accuracy_array.pickle', 'wb'))
    pickle.dump(total_train_accuracy_array,
                open(model_path + 'total_train_accuracy_array.pickle', 'wb'))
    pickle.dump(total_train_loss_array,
                open(model_path + 'total_train_loss_array.pickle', 'wb'))

    if testID % save_step is 0:
        confusion_matrix = confusion_matrix / confusion_matrix.sum(
            axis=1, keepdims=True)
        plt.figure()
        plt.imshow(confusion_matrix, interpolation='nearest')
        plt.title('Total accuracy: ' + str(accuracy) + '%')
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.xticks(np.arange(0, num_classes), classes)
        plt.yticks(np.arange(0, num_classes), classes)
        plt.colorbar()
        if show_plot: plt.show()
        if save_plot:
            plt.savefig(model_path + 'confusion_matrix' + str(testID) + '.png')
            tikz_save(model_path + 'confusion_matrix' + str(testID) + '.tex',
                      encoding='utf-8',
                      show_info=False)
Ejemplo n.º 48
0
def test_create_profile_data():
    df = gathering_profile_data()
    with open("../files/pickles/profiles.pkl", 'wb') as wb:
        df.to_csv(r"../files/csv/profiles.csv", encoding='utf-8')
        pickle.dump(df, wb)
Ejemplo n.º 49
0
def serialize_testdata(testdata, path='testdata.pkl'):
    with open(path, 'wb') as file:
        pkl.dump(testdata, file)
Ejemplo n.º 50
0
file = sys.argv[1]
data = open(file, 'r').read()
data = data.decode('utf-8')

chars = list(set(data))  # char vocabulary

data_size, _vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, _vocab_size))
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

config = Config.Config()
config.vocab_size = _vocab_size

cPickle.dump((char_to_idx, idx_to_char), open(config.model_path + '.voc', 'w'), protocol=cPickle.HIGHEST_PROTOCOL)

context_of_idx = [char_to_idx[ch] for ch in data]


def data_iterator(raw_data, batch_size, num_steps):
    raw_data = np.array(raw_data, dtype=np.int32)

    data_len = len(raw_data)
    batch_len = data_len // batch_size
    data = np.zeros([batch_size, batch_len], dtype=np.int32)
    for i in range(batch_size):
        data[i] = raw_data[batch_len * i:batch_len * (i + 1)]  # data的shape是(batch_size, batch_len),每一行是连贯的一段,一次可输入多个段

    epoch_size = (batch_len - 1) // num_steps
Ejemplo n.º 51
0
import os
import numpy as np
import _pickle as cPickle

states_num = 880
fold_path = os.path.join(os.getcwd(), os.path.pardir, os.path.pardir,
                         'Weights')
file_path = os.path.join(fold_path, "q.pkl")
if not os.path.exists(file_path):
    print('[ERROR] The q file %s does not exist' % (fold_path))
else:
    with open(file_path, 'rb') as readfile:
        q = cPickle.load(readfile)
    print('[INFO] Sucessfully load q file from %s' % (file_path))
    q[880:891, :] = q[states_num, :]

    #q_ = q[states_num, :]
    #q_max = q_.max()
    #q_index = np.where(q_ == q_max)
    #print("weight for state %d is:" % (states_num), q_)
    #print("length:", len(q_))
    #print("max:", q_max)
    #print("index:", q_index)
    #print(q.shape)
    #print(q_[252])
    with open(file_path, 'wb') as writefile:
        cPickle.dump(q, writefile)
        print('[INFO] Sucessfully save q file to %s' % (file_path))
# The gradient boosted model by itself
y_pred_grd = grd.predict_proba(X_test)[:, 1]
fpr_grd, tpr_grd, _ = roc_curve(y_test, y_pred_grd)
'''

# The random forest model by itself
rf.fit(X_train, y_train)
print(rf.predict(X_test))
print(rf.score(X_test, y_test))
#cnf_matrix = confusion_matrix(y_test, y_pred)

with open(
        'C:/aditya/Thermal_images/ai_2.0/hog_detectors/hog_rf_F_NF_100pad5_in127_py3',
        'wb') as f:
    cPickle.dump(rf, f)

y_pred_rf = rf.predict_proba(X_test)[:, 1]
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf)
roc_auc = auc(fpr_rf, tpr_rf)
plt.figure(1)
lw = 2
plt.plot([0, 1], [0, 1], 'k--')
#plt.plot(fpr_rt_lm, tpr_rt_lm, label='RT + LR')
plt.plot(fpr_rf, tpr_rf, lw=lw, label='RF (area = %0.2f)' % roc_auc)
#plt.plot(fpr_rf_lm, tpr_rf_lm, label='RF + LR')
#plt.plot(fpr_grd, tpr_grd, label='GBT')
#plt.plot(fpr_grd_lm, tpr_grd_lm, label='GBT + LR')
plt.xlabel('False positive rate', fontsize=22)
plt.ylabel('True positive rate', fontsize=22)
plt.title('ROC curve foot classifier', fontsize=22)
Ejemplo n.º 53
0
 question_id = 0
 for idx, question in enumerate(train_questions):
     target.append({
         'question_id': question_id,
         'image_id': train_images[idx],
         'labels': ans2label[train_answers[idx]],
         'types': train_types[idx]
     })
     train_question_json.append({
         'question': question,
         'question_id': question_id,
         'image_id': train_images[idx]
     })
     question_id += 1
 cache_file = os.path.join(cache_root, 'coco_train_target.pkl')
 cPickle.dump(target, open(cache_file, 'wb'))
 with open("coco_train_question.json", 'w') as file_object:
     json.dump(train_question_json, file_object)
 target = []
 for idx, question in enumerate(test_questions):
     target.append({
         'question_id': question_id,
         'image_id': test_images[idx],
         'labels': ans2label[test_answers[idx]],
         'types': test_types[idx]
     })
     val_question_json.append({
         'question': question,
         'question_id': question_id,
         'image_id': test_images[idx]
     })
Ejemplo n.º 54
0
 def save_dir(self):
     f = open('label_dict', 'wb')
     cPickle.dump(self.label_dict, f)
Ejemplo n.º 55
0
    def merge_dic(self):

        list_merge = []
        dic_merge = []
        print('num is %d' % self.num)
        '''compare and merge dictionaries'''
        for m in range(1, self.num + 1):

            if m == 1:
                print('enter list1, dic1')
                f1 = open('dic\\keylist1.txt', 'rb')
                f2 = open('dic\\dic1.txt', 'rb')
                k1 = cPickle.load(f1)
                d1 = cPickle.load(f2)
                list_merge = k1
                dic_merge = d1
                f1.close()
                f2.close()
#                 print('list:%d  dic:%d'%(len(list_merge), len(dic_merge)))
#                 print(list_merge)
#                 print(dic_merge)

            else:
                print('enter list%d dic%d' % (m, m))
                f1 = open('dic\\keylist%d.txt' % m, 'rb')
                f2 = open('dic\\dic%d.txt' % m, 'rb')
                k1 = cPickle.load(f1)
                d1 = cPickle.load(f2)
                f1.close()
                f2.close()
                k2 = list_merge
                d2 = dic_merge
                n1 = len(k1)
                n2 = len(k2)
                list_merge = []
                dic_merge = {}
                i = 0
                j = 0

                while i < n1 and j < n2:
                    if k1[i] < k2[j]:
                        list_merge.append(k1[i])
                        dic_merge[k1[i]] = d1[k1[i]]
                        i += 1
                    elif k1[i] > k2[j]:
                        list_merge.append(k2[j])
                        dic_merge[k2[j]] = d2[k2[j]]
                        j += 1
                    else:
                        list_merge.append(k1[i])
                        dic_merge[k1[i]] = d2[k2[j]] + d1[k1[i]]
                        i += 1
                        j += 1

                if i >= n1:
                    for k in range(j, n2):
                        list_merge.append(k2[k])
                        dic_merge[k2[k]] = d2[k2[k]]
                if j >= n2:
                    for k in range(i, n1):
                        list_merge.append(k1[k])
                        dic_merge[k1[k]] = d1[k1[k]]


#                 print('i:%d  j:%d  n1:%d  n2:%d  k:%d'%(i,j,n1,n2,k))
#                 print(list_merge)
#                 print(dic_merge)

        print('merge all')
        #         print('list:%d'%(len(list_merge)))
        #         print(list_merge)
        #         print(dic_merge)
        '''write the dictionary into a disk'''
        filename = 'dic\\dictionary.txt'
        f = open(filename, "wb")
        cPickle.dump(dic_merge, f)
        f.close()
        '''write key list into disk'''
        filename = 'dic\\keys.txt'
        f = open(filename, "wb")
        cPickle.dump(list_merge, f)
        f.close()
Ejemplo n.º 56
0
def main():
    args = parse_args()

    # Devices
    if args.local_rank == -1:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        torch.distributed.init_process_group(backend="nccl")
    default_gpu = False
    if dist.is_available() and args.local_rank != -1:
        rank = dist.get_rank()
        if rank == 0:
            default_gpu = True
    else:
        default_gpu = True
    logger.info(
        f"device: {device} n_gpu: {n_gpu}, distributed training: {bool(args.local_rank != -1)}"
    )

    # Load config
    config = BertConfig.from_json_file(args.config_file)

    # Load task config
    with open(args.tasks_config_file, "r") as f:
        task_cfg = edict(yaml.safe_load(f))
    task_id = args.task.strip()
    task = "TASK" + task_id
    task_name = task_cfg[task]["name"]
    if task_cfg[task].get("fusion_method", None):
        # VL-BERT pooling for VQA
        config.fusion_method = task_cfg[task]["fusion_method"]

    # Output dirs
    savePath = args.output_dir
    if default_gpu and not os.path.exists(savePath):
        os.makedirs(savePath)

    # Seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # Dataset
    feats_h5path = task_cfg[task]["features_h5path1"]
    features_reader = ImageFeaturesH5Reader(feats_h5path, config,
                                            args.in_memory)
    batch_size = task_cfg[task]["batch_size"]
    num_workers = args.num_workers
    if args.local_rank != -1:
        batch_size = int(batch_size / dist.get_world_size())
        num_workers = int(num_workers / dist.get_world_size())
    logger.info("Loading %s Dataset with batch size %d" %
                (task_name, batch_size))
    eval_split = args.split or task_cfg[task]["val_split"]
    tokenizer = BertTokenizer.from_pretrained(args.bert_model,
                                              do_lower_case=args.do_lower_case)
    dset = FlickrVis4LangDataset(
        task,
        task_cfg[task]["dataroot"],
        args.masking,
        eval_split,
        features_reader,
        None,
        tokenizer,
        args.bert_model,
        max_seq_length=task_cfg[task]["max_seq_length"],
        max_region_num=task_cfg[task]["max_region_num"],
        num_locs=config.num_locs,
        threshold=args.overlap_threshold,
        add_global_imgfeat=config.add_global_imgfeat)
    dl = DataLoader(dset,
                    shuffle=False,
                    batch_size=batch_size,
                    num_workers=num_workers,
                    pin_memory=True)

    # Model
    config.visual_target_weights = {}
    model = BertForVLPreTraining.from_pretrained(args.from_pretrained,
                                                 config=config)

    # Move to GPU(s)
    model.to(device)
    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training."
            )
        model = DDP(model, delay_allreduce=True)
    elif n_gpu > 1:
        model = nn.DataParallel(model)

    # Print summary
    if default_gpu:
        print("***** Running evaluation *****")
        print("  Num Iters: ", len(dl))
        print("  Batch size: ", batch_size)

    # Evaluate
    model.eval()
    loss_fct = nn.CrossEntropyLoss(ignore_index=-1)
    phrase_ids, image_ids, pred_tokens, true_tokens, pred_scores, lm_losses = [], [], [], [], [], []
    for batch in tqdm(dl, total=len(dl)):
        image_id = batch[-1]
        batch = batch[:-1]
        if device.type != 'cpu':
            batch = tuple(
                t.cuda(device=device, non_blocking=True) for t in batch)
        phrase_id, caption, input_mask, segment_ids, lm_label_ids, features, spatials, image_cls, \
            obj_labels, obj_confs, attr_labels, attr_confs, image_attrs, image_mask, image_labels = batch

        with torch.no_grad():
            predictions_t, _, _, _, _ = model(caption,
                                              features,
                                              spatials,
                                              token_type_ids=segment_ids,
                                              attention_mask=input_mask,
                                              image_attention_mask=image_mask,
                                              masked_lm_labels=None,
                                              image_label=None,
                                              image_cls=image_cls,
                                              obj_labels=obj_labels,
                                              obj_confs=obj_confs,
                                              attr_labels=attr_labels,
                                              attr_confs=attr_confs,
                                              image_attrs=image_attrs)

            # loss = masked_loss_t + masked_loss_v + pair_match_loss
            target_ixs = [[] for _ in range(predictions_t.size(0))]
            xs, ys = torch.where(lm_label_ids != -1)
            for x, y in zip(xs, ys):
                target_ixs[x].append(y.item())
            for bix in range(predictions_t.size(0)):
                pred_bix_tokens, true_bix_tokens, bix_predictions = [], [], []
                for masked_ix in target_ixs[bix]:
                    predicted_index = torch.argmax(
                        predictions_t[bix, masked_ix]).item()
                    predicted_token = tokenizer.convert_ids_to_tokens(
                        [predicted_index])[0]
                    label_token = tokenizer.convert_ids_to_tokens(
                        [lm_label_ids[bix, masked_ix].item()])[0]
                    pred_bix_tokens.append(predicted_token)
                    true_bix_tokens.append(label_token)
                    bix_predictions.append(predictions_t[bix,
                                                         masked_ix].numpy())
                masked_lm_loss = loss_fct(
                    predictions_t[bix].view(-1, config.vocab_size),
                    lm_label_ids[bix].view(-1),
                ).unsqueeze(0).item()

                if args.dump_results:
                    # pred_tokens.append(pred_bix_tokens)
                    # true_tokens.append(true_bix_tokens)
                    # pred_scores.append(bix_predictions)
                    # image_ids.append(image_id[bix].item())
                    # phrase_ids.append(phrase_id[bix].item())
                    lm_losses.append(masked_lm_loss)

    if default_gpu:
        print("MLM:", np.mean(np.array(lm_losses)))

        if args.dump_results:
            eval_path = os.path.join(savePath, eval_split)
            masking_str = args.masking if args.masking != "ref" else args.masking + str(
                args.overlap_threshold)
            # cPickle.dump(pred_tokens, open(eval_path + "_%s_preds.pkl" % masking_str, "wb"))
            # cPickle.dump(true_tokens, open(eval_path + "_%s_truth.pkl" % masking_str, "wb"))
            # cPickle.dump(pred_scores, open(eval_path + "_%s_score.pkl" % masking_str, "wb"))
            # cPickle.dump(image_ids, open(eval_path + "_%s_imgids.pkl" % masking_str, "wb"))
            # cPickle.dump(phrase_ids, open(eval_path + "_%s_phrids.pkl" % masking_str, "wb"))
            cPickle.dump(lm_losses,
                         open(eval_path + "_%s_mlm.pkl" % masking_str, "wb"))
def TrainModel():
    global vocab_index 
    vocab_index={}
    global vocab_invindex 
    vocab_invindex={}
    threadlist=[]
    #LearnVocabFromTrainFile()
    

    InitNet()
    print("initialize finished")
    if(negative>0):
        table=pickle.load( open( "table.p", "rb" ) )
        #InitUnigramTable()
        print("Table finished")

    print("train starts")   
    try:
        """
        thread1=threading.Thread(target=TrainModelThread, args=(0,))
        thread2=threading.Thread(target=TrainModelThread, args=(1,))
        thread3=threading.Thread(target=TrainModelThread, args=(2,))

        thread1.start()
        thread2.start()
        thread3.start()

        thread1.join()
        thread2.join()
        thread3.join()

        """
        
        for i in range(worker):
            threadlist.append(threading.Thread(target=TrainModelThread, args=(i,)))
        
        for i in range(worker):
            threadlist[i].start()

        for i in range(worker):
            threadlist[i].join()
        
    except:
        print("Error: unable to start thread")

    while 1:
        pass

    print("train finished")

    

    #save word index and weight matrix
    for i in range(vocab_size):
        vocab_index[vocab[i].word]=i
    
    #save word index and weight matrix
    for i in range(vocab_size):
        vocab_invindex[i]=vocab[i].word
    
    #save with pickle

    pickle.dump( syn0, open( "WeightMatrix.p", "wb" ) )
    pickle.dump( vocab_index, open( "vocab_index.p", "wb" ) )
    pickle.dump( vocab_invindex, open( "vocab_invindex.p", "wb" ) )
    
    return vocab_index,vocab_invindex
Ejemplo n.º 58
0
config = neat.config.Config(
    neat.genome.DefaultGenome,
    neat.reproduction.DefaultReproduction,
    neat.species.DefaultSpeciesSet,
    neat.stagnation.DefaultStagnation,
    'config_neat_pole_balancing')


# Use the gym_runner to run this experiment using NEAT.
def run(gens, env):
    winner, stats = run_neat(gens, env, 500, config)
    print("neat_pole_balancing done")
    return winner, stats


# If run as script.
if __name__ == '__main__':
    # Setup logger and environment.
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    env = gym.make("CartPole-v1")

    # Run!
    winner = run(500, env)[0]

    # Save net if wished reused and draw it + winner to file.
    winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
    draw_net(winner_net, filename="neat_pole_balancing_winner")
    with open('neat_pole_balancing_winner.pkl', 'wb') as output:
        pickle.dump(winner_net, output)
Ejemplo n.º 59
0
def xo_compressed_pickle(title, data):
    with bz2.BZ2File(title + '.pbz2', 'w') as f:
        cPickle.dump(data, f)
Ejemplo n.º 60
0
digits_test_images_flat = digits_test[:, :(-1)]
digits_test_images = digits_test_images_flat.view()
digits_test_images.shape = ((-1), 8, 8)
digits_test_target = digits_test[:, (-1)].astype(np.int)

images_and_labels = list(zip(digits_images, digits_target))

n_samples = len(digits_images)

classifier = svm.SVC(gamma=0.001, kernel='linear')

classifier.fit(digits_images_flat, digits_target)

expected = digits_test_target
predicted = classifier.predict(digits_test_images_flat)
print('Classification report for classifier %s:\n%s\n' %
      (classifier, metrics.classification_report(expected, predicted)))
print('mutpy' % metrics.confusion_matrix(expected, predicted))
print("accuracy:", metrics.accuracy_score(expected, predicted))

images_and_predictions = list(zip(digits_test_images, predicted))

np.savetxt('output.txt', classifier.decision_function(digits_test_images_flat))

outputData = {'data_array': metrics.confusion_matrix(expected, predicted)}

with open('output.pkl', 'wb') as outputFile:
    cPickle.dump(outputData, outputFile)

with open('model.pkl', 'wb') as modelFile:
    cPickle.dump(classifier, modelFile)