Esempio n. 1
0
def main(_):

	print("Parameters: ")
	for k, v in FLAGS.__flags.items():
		print("{} = {}".format(k, v))

	if not os.path.exists("./prepro/"):
		os.makedirs("./prepro/")

	if FLAGS.prepro:
		img_feat, tags_idx, a_tags_idx, vocab_processor = data_utils.load_train_data(FLAGS.train_dir, FLAGS.tag_path, FLAGS.prepro_dir, FLAGS.vocab)	
	else:
		img_feat = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "img_feat.dat"), 'rb'))
		tags_idx = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "tag_ids.dat"), 'rb'))
		a_tags_idx = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "a_tag_ids.dat"), 'rb'))
		vocab_processor = VocabularyProcessor.restore(FLAGS.vocab)
	img_feat = np.array(img_feat, dtype='float32')/127.5 - 1.
	test_tags_idx = data_utils.load_test(FLAGS.test_path, vocab_processor)

	print("Image feature shape: {}".format(img_feat.shape))
	print("Tags index shape: {}".format(tags_idx.shape))
	print("Attribute Tags index shape: {}".format(a_tags_idx.shape))
	print("Vocab size: {}".format(len(vocab_processor._reverse_mapping)))
	print("Vocab max length: {}".format(vocab_processor.max_document_length))
	
	data = Data(img_feat, tags_idx, a_tags_idx, test_tags_idx, FLAGS.z_dim, vocab_processor)

	Model = getattr(sys.modules[__name__], FLAGS.model)	
	print(Model)

	model = Model(data, vocab_processor, FLAGS)
	
	model.build_model()
	
	model.train()
Esempio n. 2
0
def loadlevel():
    dellevel()
    for sp_overlay in store.store['spo']:
        sp_overlay.delete()
    del store.store['spo'][:]
    del store.store['gp'][:]
    loadlev = open('saved_level','rb')
    store.store['gt'] = cPickle.load(loadlev)
    store.store['gp'] = cPickle.load(loadlev)
    for g_tile in store.store['gt']:
        sp_tile = Sp_Tile(x=clevel.ct(g_tile.coor[0]), y=clevel.ct(g_tile.coor[1]),
                          img=getim(g_tile),
                          bt= store.map_batch,id=g_tile.id)
        sp_tile.rotation=g_tile.rot
        store.store['spt'].append(sp_tile)
        if g_tile.overlays:
            for ol in g_tile.overlays:
                sp_overlay = Sp_Tile(x=clevel.ct(ol.x),y=clevel.ct(ol.y),
                                     img=getim(ol),
                                     bt=store.item_batch,id=ol.id,
                                     ol=True)
                store.store['spo'].append(sp_overlay)
    for g_player in store.store['gp']:
        sp_overlay = Sp_Tile(x=clevel.ct(g_player.coor[0]),
                             y=clevel.ct(g_player.coor[1]),
                             img = getim(g_player),
                             id= g_player.id,
                             bt = store.player_batch)
        store.store['spo'].append(sp_overlay)
    loadlev.close()
Esempio n. 3
0
def get_camp_info(camp, src="ipinyou"):
	if src == "ipinyou":
		info = pickle.load(open(ipinyouPath + camp + "/info.txt", "rb"))
	elif src == "vlion":
		info = pickle.load(open(vlionPath + camp + "/info.txt", "rb"))
	elif src == "yoyi":
		info = pickle.load(open(yoyiPath + camp + "/info.txt", "rb"))
	return info
 def load(self, filename):
     try:
         file = open(filename, 'rb') 
         if len(self.buffer) == 0:
             self.buffer = pickle.loads(pickle.load(file))
         else:
             buf = pickle.loads(pickle.load(file))
             self.merge(buf)
         file.close() 
         return True
     except Exception as e:
         return False
def preprocess(words_file = "tools/word_data.pkl", authors_file="tools/email_authors.pkl"):
    """ 
        this function takes a pre-made list of email texts (by default word_data.pkl)
        and the corresponding authors (by default email_authors.pkl) and performs
        a number of preprocessing steps:
            -- splits into training/testing sets (10% testing)
            -- vectorizes into tfidf matrix
            -- selects/keeps most helpful features

        after this, the feaures and labels are put into numpy arrays, which play nice with sklearn functions

        4 objects are returned:
            -- training/testing features
            -- training/testing labels

    """

    ### the words (features) and authors (labels), already largely preprocessed
    ### this preprocessing will be repeated in the text learning mini-project
    authors_file_handler = open(authors_file, "rb")
    authors = cPickle.load(authors_file_handler)
    authors_file_handler.close()

    words_file_handler = open(words_file, "rb")
    word_data = cPickle.load(words_file_handler)
    words_file_handler.close()

    ### test_size is the percentage of events assigned to the test set
    ### (remainder go into training)
    features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)



    ### text vectorization--go from strings to lists of numbers
    vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
                                 stop_words='english')
    features_train_transformed = vectorizer.fit_transform(features_train)
    features_test_transformed  = vectorizer.transform(features_test)



    ### feature selection, because text is super high dimensional and 
    ### can be really computationally chewy as a result
    selector = SelectPercentile(f_classif, percentile=1)
    selector.fit(features_train_transformed, labels_train)
    features_train_transformed = selector.transform(features_train_transformed).toarray()
    features_test_transformed  = selector.transform(features_test_transformed).toarray()

    ### info on the data
    print ("no. of Chris training emails:", sum(labels_train))
    print ("no. of Sara training emails:", len(labels_train)-sum(labels_train))
    
    return features_train_transformed, features_test_transformed, labels_train, labels_test
Esempio n. 6
0
    def __init__(self, master, main_window):
        ttk.Frame.__init__(self, master)
        self.window = main_window
        self.major_components = ["PrimaryWeapon", "PrimaryWeapon2", "SecondaryWeapon", "SecondaryWeapon2", "Systems"]
        self.middle_components = ["Engine", "ShieldProjector"]
        self.minor_components = ["Magazine", "Capacitor", "Reactor", "Armor", "Sensor", "Thruster"]
        # Open all required databases
        self.icons_path = path.abspath(path.join(path.dirname(path.realpath(__file__)), "..", "assets", "icons"))
        with open(path.join(get_assets_directory(), "ships.db"), "rb") as f:
            # Contains data on the components
            self.ships_data = pickle.load(f)
        with open(path.join(get_assets_directory(), "categories.db"), "rb") as f:
            # Contains data on the ships (specifically descriptions and the like)
            self.categories_data = pickle.load(f)
        with open(path.join(get_assets_directory(), "companions.db"), "rb") as f:
            # Contains data on the Crew members
            self.companions_data = pickle.load(f)
        # ScrollFrame to contain the component lists (ToggledFrames) and the CrewSelectFrame
        self.components_lists_frame = VerticalScrollFrame(self, canvaswidth=260, canvasheight=315)

        self.ship_select_frame = ShipSelectFrame(self, self.set_ship, self.set_faction)
        self.components_lists = OrderedDict()
        self.faction = "Imperial"
        self.category = "Scout"
        self.ship = Ship("Bloodmark")
        self.character = None
        self.ship_name = None
        # Header above the Components ToggledFrames
        self.components_lists_header_label = ttk.Label(
            self.components_lists_frame.interior, text="Components",
            justify=tk.LEFT, font=("Calibiri", 12))
        for category in COMPONENTS:
            # Bloodmark is the default around which the widgets are created
            if category not in self.ships_data["Imperial_S-SC4_Bloodmark"]:
                continue
            self.components_lists[category] = \
                ComponentListFrame(
                    self.components_lists_frame.interior, category,
                    self.ships_data["Imperial_S-SC4_Bloodmark"][category], self.set_component,
                    self.toggle_callback)
        self.component_frame = ttk.Frame(self)
        self.current_component = ComponentWidget(
            self.component_frame, self.ships_data["Imperial_S-SC4_Bloodmark"]["PrimaryWeapon"][0],
            self.ship, "PrimaryWeapon")
        self.crew_select_frame = CrewListFrame(
            self.components_lists_frame.interior, self.faction,
            self.companions_data, self.set_crew_member)
        # Image for on the ShipStats button
        self.ship_stats_image = open_icon("spvp_targettracker", (49, 49))
        self.ship_stats_button = ttk.Button(
            self, text="Show ship statistics", command=self.show_ship_stats,
            image=self.ship_stats_image, compound=tk.LEFT)
        self.reset()
Esempio n. 7
0
 def __init__(self, ship: Ship, ships_data: dict, companions_data: dict):
     """
     :param ship: Ship object
     """
     self.stats = dict()
     self.ship = ship
     if ships_data is None:
         with open(os.path.join(get_assets_directory(), "ships.db"), "rb") as fi:
             ships_data = pickle.load(fi)
     if companions_data is None:
         with open(os.path.join(get_assets_directory(), "companions.db"), "rb") as fi:
             companions_data = pickle.load(fi)
     self.ships_data = ships_data.copy()
     self.companions_data = companions_data.copy()
     self.calc_ship_stats()
def xgboost_pred(df_all_file,d_col_drops,n_estimators,learning_rate,max_depth):
    ### Load
    pickle_file = '%s/%s'%(Dir,df_all_file)
    with open(pickle_file, 'rb') as f:
      save = pickle.load(f)
      df_all = save['df_all']
      del save  # hint to help gc free up memory
      print('df_all', df_all.shape)
    ##########################

    df_train = df_all.iloc[:num_train]
    df_test = df_all.iloc[num_train:]
    id_test = df_test['id']
    y_train = df_train['relevance'].values
    #y_train = pd.DataFrame(df_train['relevance'].values,columns=['relevance'])
    X_train =df_train[:]
    X_test = df_test[:]
    print("--- Features Set: %s minutes ---" % round(((time.time() - start_time)/60),2))
    X_train2 = X_train.drop(d_col_drops,axis=1).values

    # Prediction
    xgb_model = xgb.XGBRegressor(n_estimators=n_estimators,learning_rate=learning_rate,max_depth=max_depth,seed=2016,silent=False, nthread=-1, gamma=0.000001, min_child_weight=1, max_delta_step=0,
                 subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
                 base_score=0.5, missing=None)
    xgb_model.fit(X_train2, y_train)

    X_test2 = X_test.drop(d_col_drops, axis=1).values
    y_pred = xgb_model.predict(X_test2)
    #y_pred = [max(1., min(x, 3.)) for x in y_pred]
    pd.DataFrame({"id": id_test, "relevance": y_pred}).to_csv('%s/submission_v6_B_xgboost_same_query_score.csv' % (Dir),
                                                              index=False)
    print("--- Training & Testing: %s minutes ---" % round(((time.time() - start_time) / 60), 2))
    return xgb_model,y_pred
def xgboost_test(df_all_file,d_col_drops,n_estimators,learning_rate,max_depth):
    ### Load
    pickle_file = '%s/%s'%(Dir,df_all_file)
    with open(pickle_file, 'rb') as f:
      save = pickle.load(f)
      df_all = save['df_all']
      del save  # hint to help gc free up memory
      print('df_all', df_all.shape)
    ##########################

    df_train = df_all.iloc[:num_train]
    df_test = df_all.iloc[num_train:]
    id_test = df_test['id']
    y_train = df_train['relevance'].values
    #y_train = pd.DataFrame(df_train['relevance'].values,columns=['relevance'])
    X_train =df_train[:]
    X_test = df_test[:]
    print("--- Features Set: %s minutes ---" % round(((time.time() - start_time)/60),2))
    X_train2 = X_train.drop(d_col_drops,axis=1).values

    ### Custom CV
    from sklearn.cross_validation import train_test_split
    X_train3, X_valid3, y_train3, y_valid3 = train_test_split(X_train2, y_train, test_size=0.2, random_state=2009)
    xgb_model = xgb.XGBRegressor(n_estimators=n_estimators,learning_rate=learning_rate,max_depth=max_depth,seed=2016)
    xgb_model.fit(X_train3, y_train3)

    y_pred = xgb_model.predict(X_valid3)
    y_pred=[max(1.,min(x,3.)) for x in y_pred]

    return xgb_model,y_pred,y_valid3
def method_pred(model,df_all_file,d_col_drops):
    ### Load
    pickle_file = '%s/%s'%(Dir,df_all_file)
    with open(pickle_file, 'rb') as f:
      save = pickle.load(f)
      df_all = save['df_all']
      del save  # hint to help gc free up memory
      print('df_all', df_all.shape)
    ##########################

    df_train = df_all.iloc[:num_train]
    df_test = df_all.iloc[num_train:]
    id_test = df_test['id']
    y_train = df_train['relevance'].values
    #y_train = pd.DataFrame(df_train['relevance'].values,columns=['relevance'])
    X_train =df_train[:]
    X_test = df_test[:]
    print("--- Features Set: %s minutes ---" % round(((time.time() - start_time)/60),2))
    X_train2 = X_train.drop(d_col_drops,axis=1).values
    # Prediction
    model.fit(X_train2, y_train)
    X_test2 = X_test.drop(d_col_drops, axis=1).values
    y_pred = model.predict(X_test2)

    return model, y_pred
Esempio n. 11
0
    def load_results(pickle_file):
        '''
        Load in a saved pickle file.

        Parameters
        ----------
        pickle_file : str
            Name of filename to load in.

        Returns
        -------
        self : Save statistic class
            Statistic instance with saved results.

        Examples
        --------
        Load saved results.
        >>> stat = Statistic.load_results("stat_saved.pkl") # doctest: +SKIP

        '''

        with open(pickle_file, 'rb') as input:
                self = pickle.load(input)

        return self
Esempio n. 12
0
def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.

    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.

    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.

    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.

    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('../data/mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = _pickle.load(f, encoding='bytes')
    f.close()
    return (training_data, validation_data, test_data)
Esempio n. 13
0
    def region_filtered_flux(self, s_flux):
        """Finds the flux through the filtered region, accounts for materials"""

        if self.print_matrix_sums:
            print_sum_matrix(s_flux, 'region_filtered_flux s_flux')

        # Strong Feeling there is a big fat bug somewhere around here
        thickness=sum(self.thickness)  # in cm

        mat=self.mat
        ea=self.ea

        reg_flux=matchdim(s_flux)
        
        f=open("mu_data\\"+mat+".pkl", "rb")
        edata=pickle.load(f)
        f.close()
        elem_energy=[i[0] for i in edata]
        elem_flux=[i[1] for i in edata]
        # you knew it, absurdly slow code below. Really no way to get around it though. Filtering sucks, O(n^3)
        for i in range(0, len(s_flux)):
            si=s_flux[i]
            for j in range(0, len(s_flux[0])):
                sij=si[j]
                muexp=[-1*thickness*mu3(elem_energy, elem_flux, ea[m]) for m in range(0, len(ea))]
                for m in range(0, len(ea)):
                    reg_flux[i][j][m]=sij[m]*math.exp(muexp[m])
        
        if self.print_matrix_sums:
            print_sum_matrix(s_flux, 'region_filtered_flux reg_flux')
            
        return reg_flux
Esempio n. 14
0
def load_models(models_dir):
  """
  Load saved models from disk. This will attempt to unpickle all files in a
  directory; any files that give errors on unpickling (such as README.txt) will
  be skipped.

  Inputs:
  - models_dir: String giving the path to a directory containing model files.
    Each model file is a pickled dictionary with a 'model' field.

  Returns:
  A dictionary mapping model file names to models.
  """
  models = {}
  for model_file in os.listdir(models_dir):
    with open(os.path.join(models_dir, model_file), 'rb') as f:
      try:
        models[model_file] = pickle.load(f,encoding='bytes')['model']
      except pickle.UnpicklingError:
        continue
  return models

# cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
# cifar10_dir = 'C:/Work/Deep Learning/assignment1/cs231n/datasets/cifar-10-batches-py'
# X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
Esempio n. 15
0
 def load(self, path):
     """ Load model parameters from path. """
     logger.info("Loading from %s ..." % path)
     file = open(path, 'rb')
     state = pickle.load(file)
     self.__setstate__(state)
     file.close()
Esempio n. 16
0
def load_dictionary(loc='./data/book_dictionary_large.pkl'):
    """
    Load a dictionary
    """
    with open(loc, 'rb') as f:
        worddict = pkl.load(f)
    return worddict
Esempio n. 17
0
def start():
    config = DataConfig()
    histories = sorted(glob.glob(config.history_location+"*.pickle"))
    data = {}
    for hist in histories:
        file = open(hist, 'rb') 
        h = pickle.loads(pickle.load(file))
        for k, v in h.items():
            if k not in data.keys():
                data[k] = []
            for item in v:
                data[k].append(item)
    legend = []
    plt.subplot(2, 1, 1)
    for kv in data.items():
        legend.append(kv[0])
        plt.plot(kv[1])
    plt.legend(legend)
    for i, kv in enumerate(data.items()):
        plt.subplot(2, 3, i+4)
        plt.title(kv[0])
        plt.plot(kv[1])
    plt.tight_layout()
    plt.show()
        
Esempio n. 18
0
 def get_features(self):
     features = []
     clips_path = self.get_clips_path()
     for clip, path in clips_path.items():
         with open(path, 'rb') as f:
             feature = cPickle.load(f, encoding='latin1')
             features.append(self.modify_nan(feature.reshape(-1)))
     return np.array(features)
Esempio n. 19
0
 def _load_samples(self, full_filepath):
   f = gzip.open(full_filepath, 'rb')
   train_set, valid_set, test_set = cPickle.load(f)
   f.close()
   images = test_set[0]
   labels = test_set[1]
   images = (images - 0.5) * 2
   return np.float32(images), labels
Esempio n. 20
0
def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    datadict = pickle.load(f)
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
    Y = np.array(Y)
    return X, Y
Esempio n. 21
0
 def load_preprocessed(self, vocab_file, tensor_file):
   with open(vocab_file, 'rb') as f:
     self.chars = cPickle.load(f)
   self.vocab_size = len(self.chars)
   self.vocab = dict(zip(self.chars, range(len(self.chars))))
   self.tensor = np.load(tensor_file)
   train_size = int(self.tensor.shape[0] * 0.9)
   self.valid = self.tensor[train_size:]
   self.train = self.tensor[:train_size]
Esempio n. 22
0
 def play_multiple_games(self):
     outfiles = Parallel(n_jobs=self.workers)(delayed(self.play_games)() for g in range(self.workers))
     results = []
     for filename in outfiles:
         f = open(filename,'rb')
         results.extend(cPickle.load(f))
         f.close()
         os.remove(filename)
     return results
Esempio n. 23
0
def unpickle(file):
  import _pickle as cPickle
  fo = open(file, 'rb')
  dict = cPickle.load(fo,encoding='latin1')
  fo.close()
  if 'data' in dict:
    dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32*32*3) / 256.

  return dict
Esempio n. 24
0
def mergeStagedWTL(config):
    #only run if not already merging in another process
    if os.path.exists(config.data.performance_location+"win_matrix_temp.csv"):
        return None
    else:
        win_matrix_file = open(config.data.performance_location+"win_matrix_temp.csv", "w+")
        
    merged_data = []
    files = glob.glob(config.data.performance_location+"staged_*.pickle")
    for file in files:
        try:
            data = pickle.load(open(file, "rb"))
            found = False
            for i in range(len(merged_data)):
                if merged_data[i]['player1'] == data['player1'] and merged_data[i]['player2'] == data['player2']:
                    found = True
                    merged_data[i]['wins'] += data['wins']
                    merged_data[i]['ties'] += data['ties']
                    merged_data[i]['losses'] += data['losses']
                    break
                elif merged_data[i]['player1'] == data['player2'] and merged_data[i]['player2'] == data['player1']:
                    found = True
                    merged_data[i]['wins'] += data['losses']
                    merged_data[i]['ties'] += data['ties']
                    merged_data[i]['losses'] += data['wins']
                    break
            if not found:
                merged_data.append(data)
            os.remove(file)
        except Exception as e:
            continue
    
    if os.path.exists(config.data.performance_location+"win_matrix.csv"):
        df = pd.read_csv(config.data.performance_location+"win_matrix.csv", index_col=0)    
    else:
        df = pd.DataFrame()
    
    for elem in merged_data:
        if not elem["player1"] in list(df):
            df[elem["player1"]] = 0.0
            df.loc[elem["player1"]] = 0.0
            df = df.sort_index(axis=0).sort_index(axis=1)
        if not elem["player2"] in list(df):
            df[elem["player2"]] = 0.0
            df.loc[elem["player2"]] = 0.0
            df = df.sort_index(axis=0).sort_index(axis=1)
        df.at[elem["player1"], elem["player2"]] = df.at[elem["player1"], elem["player2"]] + elem["wins"] + 0.5*elem["ties"]
        df.at[elem["player2"], elem["player1"]] = df.at[elem["player2"], elem["player1"]] + elem["losses"] + 0.5*elem["ties"]
    df.to_csv(win_matrix_file)
    win_matrix_file.close()
    if checkFileOpen(config.data.performance_location+"win_matrix.csv"):
        print("Waiting for %s to be close."% os.path.normpath(config.data.performance_location+"win_matrix.csv"))
    while checkFileOpen(config.data.performance_location+"win_matrix.csv"):
        sleep(0.1)
    shutil.move(config.data.performance_location+"win_matrix_temp.csv", config.data.performance_location+"win_matrix.csv")
    return df
Esempio n. 25
0
def load_CIFAR_batch(filename, astype='float'):
  """ load single batch of cifar """
  print(filename)
  with open(filename, 'rb') as f:
    datadict = pickle.load(f, encoding='latin1')
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype(astype)
    Y = np.array(Y)
    return X, Y
Esempio n. 26
0
 def __read_test_times_file(fd):
   try:
     with gzip.GzipFile(fileobj=fd, mode='rb') as gzf:
       times = cPickle.load(gzf)
   except Exception:
     # File doesn't exist, isn't readable, is malformed---whatever.
     # Just ignore it.
     return None
   else:
     return times
Esempio n. 27
0
File: utils.py Progetto: HounD/IDNNs
def get_data(name):
	"""Load data from the given name"""
	gen_data = {}
	# new version
	if os.path.isfile(name + 'data.pickle'):
		curent_f = open(name + 'data.pickle', 'rb')
		d2 = cPickle.load(curent_f)
	# Old version
	else:
		curent_f = open(name, 'rb')
		d1 = cPickle.load(curent_f)
		data1 = d1[0]
		data = np.array([data1[:, :, :, :, :, 0], data1[:, :, :, :, :, 1]])
		# Convert log e to log2
		normalization_factor = 1 / np.log2(2.718281)
		epochsInds = np.arange(0, data.shape[4])
		d2 = {}
		d2['epochsInds'] = epochsInds
		d2['information'] = data / normalization_factor
	return d2
def loadPickle():
	#open file for editing
	pickl = open('data.pkl', 'rb');
	
	#load object from file
	unpickledObj = pickle.load(pickl)
	
	# close at the end
	pickl.close()
	
	return unpickledObj 
Esempio n. 29
0
 def open_database(self):
     """Open the file database"""
     path = os.path.join(get_temp_directory(), "files.db")
     if not os.path.exists(path):
         self.db = {"version": settings["sharing"]["version"]}
         self.save_database()
     with open(path, "rb") as fi:
         self.db = pickle.load(fi)
     if self.db["version"] != settings["sharing"]["version"]:
         os.remove(path)
         self.open_database()
def unPickleIt(pickle_path): # might throw the file not found exception
    '''
        function to unpickle the object from the given path
        @param
        pickle_path => the path where the pickle file is located
        @return => the object extracted from the saved path
    '''

    with open(pickle_path, 'rb') as dumped_pickle:
        obj = pickle.load(dumped_pickle)

    return obj # return the unpickled object
Esempio n. 31
0
def train(dim_word=100,  # word vector dimensionality
          dim_dec=1000,
          dim_attention=512,
          dim_coverage=512,
          kernel_coverage=[5,5],
          kernel_Convenc=[3,1],
          dim_ConvBlock=[32,64,64,128],
          layersNum_block=[4,4,4,4],
          encoder='gru',
          decoder='gru_cond',
          patience=4,  # early stopping patience
          max_epochs=5000,
          finish_after=10000000,  # finish after this many updates
          dispFreq=100,
          decay_c=0.,  # L2 regularization penalty
          alpha_c=0.,  # alignment regularization
          clip_c=-1.,  # gradient clipping threshold
          lrate=1e-8,  # learning rate
          dim_target=62,  # source vocabulary size
          input_channels=123,  # target vocabulary size
          maxlen=100,  # maximum length of the description
          maxImagesize=1, # maximum size of the input image
          optimizer='rmsprop',
          batch_Imagesize=16,
          valid_batch_Imagesize=16,
          batch_size=16,
          valid_batch_size=16,
          saveto='model.npz',
          bn_saveto='bn_model.npz',
          validFreq=1000,
          saveFreq=1000,   # save the parameters after every saveFreq updates
          sampleFreq=100,   # generate some samples after every sampleFreq
          datasets=['feature.pkl',
                    'label.txt'],
          valid_datasets=['feature_valid.pkl', 
                          'label_valid.txt'],
          dictionaries=['lexicon.txt'],
          valid_output=['decode.txt'],
          valid_result=['result.txt'],
          use_dropout=False,
          reload_=False):

    # Model options
    model_options = locals().copy()

    # load dictionaries and invert them

    worddicts = load_dict(dictionaries[0])
    worddicts_r = [None] * len(worddicts)
    for kk, vv in worddicts.items():
        worddicts_r[vv] = kk

    # reload options
    if reload_ and os.path.exists(saveto):
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    print('Loading data')

    train,train_uid_list = dataIterator(datasets[0], datasets[1],
                         worddicts,
                         batch_size=batch_size, batch_Imagesize=batch_Imagesize,maxlen=maxlen,maxImagesize=maxImagesize)
    valid,valid_uid_list = dataIterator(valid_datasets[0], valid_datasets[1],
                         worddicts,
                         batch_size=valid_batch_size, batch_Imagesize=valid_batch_Imagesize,maxlen=maxlen,maxImagesize=maxImagesize)

    print('Building model')
    params = init_params(model_options)
    bn_params = init_bn_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)
        bn_params = load_params(bn_saveto, bn_params)

    tparams = init_tparams(params)
    bn_tparams = init_tparams(bn_params)

    trng, use_noise, \
        x, x_mask, y, y_mask, \
        opt_ret, \
        cost = \
        build_model(tparams, bn_tparams, model_options)
    inps = [x, x_mask, y, y_mask]

    print('Buliding sampler')
    f_init, f_next = build_sampler(tparams, bn_tparams, model_options, trng, use_noise)

    # before any regularizer
    print('Building f_log_probs...')
    f_log_probs = theano.function(inps, cost, profile=profile)
    print('Done')

    cost = cost.mean()

    # apply L2 regularization on weights
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            tmp = kk.split('_')
            if tmp[-2] != 'bn':
                weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # regularize the alpha weights
    if alpha_c > 0. and not model_options['decoder'].endswith('simple'):
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * (
            (tensor.cast(y_mask.sum(0)//x_mask.sum(0), 'float32')[:, None] -
             opt_ret['dec_alphas'].sum(0))**2).sum(1).mean()
        cost += alpha_reg

    # after all regularizers - compile the computational graph for cost
    print('Building f_cost...')
    f_cost = theano.function(inps, cost, profile=profile)
    print('Done')

    print('Computing gradient...')
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    print('Done')

    # apply gradient clipping here
    if clip_c > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (clip_c**2),
                                           g / tensor.sqrt(g2) * clip_c,
                                           g))
        grads = new_grads

    # compile the optimizer, the actual computational graph is compiled here
    lr = tensor.scalar(name='lr')
    print('Building optimizers...')
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, bn_tparams, opt_ret, grads, inps, cost)
    print('Done')

    
    
    # print model parameters
    print("Model params:\n{0}".format(
            pprint.pformat(sorted([p for p in params]))))
    # end



    print('Optimization')

    history_errs = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_errs = list(numpy.load(saveto)['history_errs'])
    best_p = None
    best_bn_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train)
    if saveFreq == -1:
        saveFreq = len(train)
    if sampleFreq == -1:
        sampleFreq = len(train)

    uidx = 0
    estop = False
    halfLrFlag = 0
    bad_counter = 0
    ud_s = 0
    ud_epoch = 0
    cost_s = 0.
    for eidx in xrange(max_epochs):
        n_samples = 0

        ud_epoch = time.time()
        random.shuffle(train) # shuffle data

        for x, y in train:
            n_samples += len(x)
            uidx += 1
            use_noise.set_value(1.)

            ud_start = time.time()

            x, x_mask, y, y_mask = prepare_data(model_options, x, y)

            if x is None:
                print('Minibatch with zero sample under length ', maxlen)
                uidx -= 1
                continue

            # compute cost, grads and copy grads to shared variables
            cost = f_grad_shared(x, x_mask, y, y_mask)
            cost_s += cost
            # do the update on parameters
            f_update(lrate)

            ud = time.time() - ud_start
            ud_s += ud
            

            # check for bad numbers, usually we remove non-finite elements
            # and continue training - but not done here
            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                return 1., 1., 1.

            # verbose
            if numpy.mod(uidx, dispFreq) == 0:
                ud_s /= 60.
                cost_s /= dispFreq
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost_s, 'UD ', ud_s, 'lrate ',lrate, 'bad_counter', bad_counter)
                ud_s = 0
                cost_s = 0.

            # save the best model so far
            if numpy.mod(uidx, saveFreq) == 0:
                print('Saving...')

                if best_p is not None:
                    params = best_p
                    bn_params = best_bn_p
                else:
                    params = unzip(tparams)
                    bn_params = unzip(bn_tparams)
                numpy.savez(saveto, history_errs=history_errs, **params)
                numpy.savez(bn_saveto, history_errs=history_errs, **bn_params)
                pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                print('Done')

            # generate some samples with the model and display them
            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                use_noise.set_value(0.)
                fpp_sample=open(valid_output[0],'w')
                valid_count_idx=0
                # FIXME: random selection?
                for x,y in valid:
                    for xx in x:
                        xx_pad = numpy.zeros((xx.shape[0],xx.shape[1],xx.shape[2]), dtype='float32') # input_channels * height * width
                        xx_pad[:,:, :] = xx / 255.
                        stochastic = False
                        sample, score = gen_sample(tparams, f_init, f_next,
                                                   xx_pad[None, :, :, :],
                                                   model_options, trng=trng, k=10,
                                                   maxlen=1000,
                                                   stochastic=stochastic,
                                                   argmax=False)
                        
                        if stochastic:
                            ss = sample
                        else:
                            score = score / numpy.array([len(s) for s in sample])
                            ss = sample[score.argmin()]

                        fpp_sample.write(valid_uid_list[valid_count_idx])
                        valid_count_idx=valid_count_idx+1
                        for vv in ss:
                            if vv == 0: # <eol>
                                break
                            fpp_sample.write(' '+worddicts_r[vv])
                        fpp_sample.write('\n')
                fpp_sample.close()
                print('valid set decode done')
                ud_epoch = time.time() - ud_epoch
                ud_epoch /= 60.
                print('epoch cost time ... ', ud_epoch)



            # validate model on validation set and early stop if necessary
            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                valid_errs = pred_probs(f_log_probs, prepare_data,
                                        model_options, valid)
                valid_err_cost = valid_errs.mean()
                

                # compute wer
                os.system('python compute-wer.py ' + valid_output[0] + ' ' + valid_datasets[1] + ' ' + valid_result[0])
                fpp=open(valid_result[0])
                stuff=fpp.readlines()
                fpp.close()
                m=re.search('WER (.*)\n',stuff[0])
                valid_per=100. * float(m.group(1))
                m=re.search('ExpRate (.*)\n',stuff[1])
                valid_sacc=100. * float(m.group(1))
                valid_err=valid_per
                #valid_err=0.7*valid_per-0.3*valid_sacc

                history_errs.append(valid_err)

                if uidx/validFreq == 0 or valid_err <= numpy.array(history_errs).min(): # the first time valid or worse model
                    best_p = unzip(tparams)
                    best_bn_p = unzip(bn_tparams)
                    bad_counter = 0

                if uidx/validFreq != 0 and valid_err > numpy.array(history_errs).min():
                    bad_counter += 1
                    if bad_counter > patience:
                        if halfLrFlag==2:
                            print('Early Stop!')
                            estop = True
                            break
                        else:
                            print('Lr decay and retrain!')
                            bad_counter = 0
                            lrate = lrate / 2
                            params = best_p
                            bn_params = best_bn_p
                            halfLrFlag += 1 

                if numpy.isnan(valid_err):
                    #ipdb.set_trace()
                    print('valid_err nan')

                print('Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f' % (valid_per,valid_sacc,valid_err_cost))

            # finish after this many updates
            if uidx >= finish_after:
                print('Finishing after %d iterations!' % uidx)
                estop = True
                break

        print('Seen %d samples' % n_samples)

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)
        zipp(best_bn_p, bn_tparams)

    use_noise.set_value(0.)
    valid_err = pred_probs(f_log_probs, prepare_data,
                           model_options, valid).mean()

    print('Valid ', valid_err)

    params = copy.copy(best_p)
    bn_params = copy.copy(best_bn_p)
    numpy.savez(saveto, zipped_params=best_p,
                history_errs=history_errs,
                **params)
    numpy.savez(bn_saveto, zipped_params=best_bn_p,
                history_errs=history_errs,
                **bn_params)

    return valid_err
Esempio n. 32
0
def load_pickle(filename):
    f = open(filename, "rb")
    p = cPickle.load(f)
    f.close()
    return (p)
Esempio n. 33
0
	def load_bigrams(self, name, path ='../../feature_groups/lda_pickles'):
		print("loading bigram: "+name)
		path = os.path.join(path, name)
		with open(path, "rb") as f:
			self.bigrams = _pickle.load(f)
Esempio n. 34
0
def voc_eval(
        det_lines,  #检测结果
        annopath,  #标记目录
        imagesetfile,  #检测图像文件名
        classname,  #筛选类别名称
        cachedir,  #缓存目录
        ovthresh=0.5,  #阈值
        use_07_metric=False  #AP计算方式
):
    '''
    @param det_splitlines   [list ]某类检测结果文件
        数据: [
                [imagename1, confidence, xmin, ymin, xmax, ymax],  #(图像1的第一个结果)
                [imagename1, confidence, xmin, ymin, xmax, ymax],  #(图像1的第二个结果)
                [imagename2, confidence, xmin, ymin, xmax, ymax],  #(图像2的第一个结果)
              ]
    @param annopath     [str ]标注目录
        annopath.format(imagename) should be the xml annotations file. #xml 标注文件。
        annopath=>'annotations/{}.xml'=>annopath.format('2008_000001')=>'annotations/2008_000001.xml'
    @param imagesetfile [str ]检测图像集文件
        文本文件,每行一个图像文件名,不含扩展名
        该文件格式:
            2008_000001
            2008_000002
    @param classname     [str  ]检测类别名称,用于筛选imagesetfile
    @param cachedir      [str  ]缓存目录,用于存放原始数据集的加载文件
    @param ovthresh      [float]IoU阈值
    @param use_07_metric [bool ]AP计算模式
        Whether to use VOC07's 11 point AP computation 
                (default False) #是否使用VOC07的AP计算方法,voc07是11个点采样。    
    @return rec, prec, ap
        rec ---召回率,向量
        prec---准确率,向量
        ap-----平均准确率,标量
        计算方法:
            检测结果数为:N=5
            按置信度由高到低排序
            TP/FP计算:
                筛选某类的检测结果及该类的gt_bbox
                TP[:],FP[:]初始化为False
                遍历检测结果
                    如果检测bbox与该类gt_bbox的IoU大于阈值,
                    则
                        TP[i]=1
                        虚警处理(同一个gt_bbox在不同的检测结果中出现):FP[i]=1
                    否则
                        FP[i]=1
                    
            TP:[1, 0, 1, 1, 0],积分值=>TP_int=[1,1,2,3,3]
            FP:[0, 1, 0, 0, 1],积分值=>FP_int=[0,1,1,1,2]
            prec:TP_int/(TP_int+FP_int)=>[1, 1/2, 2/3, 3/4, 3/5]
            rec :TP_int/N=>[1/5, 1/5, 2/5, 3/5, 3/5]
            ap:
                if use_07_metric:
                    # 11 point metric
                    ap = 0.
                    for t in np.arange(0., 1.1, 0.1):
                        if np.sum(rec >= t) == 0:
                            p = 0
                        else:
                            p = np.max(prec[rec >= t])
                        ap = ap + p / 11.
                else:
                    # correct AP calculation
                    # first append sentinel values at the end
                    mrec = np.concatenate(([0.], rec, [1.]))
                    mpre = np.concatenate(([0.], prec, [0.]))

                    # compute the precision envelope
                    for i in range(mpre.size - 1, 0, -1):
                        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

                    # to calculate area under PR curve, look for points
                    # where X axis (recall) changes value
                    i = np.where(mrec[1:] != mrec[:-1])[0]

                    # and sum (\Delta recall) * prec
                    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])            
    '''
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name
    # cachedir caches the annotations in a pickle file

    #原始数据集缓存文件 =>[str ] cachefile
    # first load gt 加载ground truth。
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)
    cachefile = os.path.join(cachedir, 'annots.pkl')  #只读文件名称。

    #读取所有测试图片名称 =>[list] imagenames
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()  #读取所有待检测图片名。
    imagenames = [x.strip() for x in lines]  #待检测图像文件名字存于数组imagenames,长度1000。

    #加载原始数据文件 =>[dict] recs{文件名:标注结构体数据}
    if not os.path.isfile(cachefile):  #如果只读文件不存在,则只好从原始数据集中重新加载数据
        # load annots
        recs = {}
        for i, imagename in enumerate(imagenames):
            recs[imagename] = parse_rec(
                annopath.format(imagename)
            )  #parse_rec函数读取当前图像标注文件,返回当前图像标注,存于recs字典(key是图像名,values是gt)
            if i % 100 == 0:
                print('Reading annotation for {:d}/{:d}'.format(
                    i + 1, len(imagenames)))  #进度条。
        # save
        print('Saving cached annotations to {:s}'.format(cachefile))
        print(type(recs))
        with open(cachefile, 'wb') as f:
            cPickle.dump(recs, f)  #recs字典c保存到只读文件。
    else:
        # load
        with open(cachefile, 'rb') as f:
            recs = cPickle.load(f)  #如果已经有了只读文件,加载到recs。

    #提取类别为classname的原始数据集
    # extract gt objects for this class #按类别获取标注文件,recall和precision都是针对不同类别而言的,AP也是对各个类别分别算的。
    class_recs = {}  #当前类别的标注
    npos = 0  #npos标记的目标数量
    for imagename in imagenames:
        #筛选类别为classname的原始数据集 => R
        R = [obj for obj in recs[imagename]
             if obj['name'] == classname]  #过滤,只保留recs中指定类别的项,存为R。
        #提取bbox,gt
        bbox = np.array([x['bbox'] for x in R])  #抽取bbox
        difficult = np.array([x['difficult'] for x in R
                              ]).astype(np.bool)  #如果数据集没有difficult,所有项都是0.

        #检测结果,默认为False
        det = [False] * len(R)  #len(R)就是当前类别的gt目标个数,det表示是否检测到,初始化为false。
        #gt目标数量(排除difficult为True的目标)
        npos = npos + sum(
            ~difficult)  #自增,非difficult样本数量,如果数据集没有difficult,npos数量就是gt数量。
        #当前类别标注(不含difficult为True的目标)
        class_recs[imagename] = {
            'bbox': bbox,  #检测边框
            'difficult': difficult,  #difficult属性
            'det': det  #检测结果
        }  #三个属性值长度相同

    # read dets 读取检测结果
    #detfile = detpath.format(classname)
    #with open(detfile, 'r') as f:
    #    lines = f.readlines()
    #splitlines = [x.strip().split(' ') for x in lines] #假设检测结果有20000个,则splitlines长度20000
    splitlines = det_lines
    image_ids = [x[0] for x in splitlines
                 ]  #检测结果中的图像名,image_ids长度20000,但实际图像只有1000张,因为一张图像上可以有多个目标检测结果
    confidence = np.array([float(x[1]) for x in splitlines])  #检测结果置信度
    BB = np.array([[float(z) for z in x[2:]]
                   for x in splitlines])  #变为浮点型的bbox。

    # sort by confidence 将20000各检测结果按置信度排序
    sorted_ind = np.argsort(-confidence)  #对confidence的index根据值大小进行降序排列。
    sorted_scores = np.sort(-confidence)  #降序排列。
    print('BB.shape:', BB.shape)
    print('sorted_ind.shape:', sorted_ind.shape)
    BB = BB[sorted_ind, :]  #重排bbox,由大概率到小概率。
    image_ids = [image_ids[x] for x in sorted_ind]  #对image_ids相应地进行重排。

    # go down dets and mark TPs and FPs
    nd = len(image_ids)  #注意这里是20000,不是1000
    tp = np.zeros(nd)  # true positive,长度20000
    fp = np.zeros(nd)  # false positive,长度20000
    for d in range(nd):  #遍历所有检测结果,因为已经排序,所以这里是从置信度最高到最低遍历
        R = class_recs[image_ids[d]]  #当前检测结果所在图像的所有同类别gt
        bb = BB[d, :].astype(float)  #当前检测结果bbox坐标,1个bbox
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)  #当前检测结果所在图像的所有同类别gt的bbox坐标,含有N个

        if BBGT.size > 0:
            # compute overlaps 计算当前检测结果,与该检测结果所在图像的标注重合率,一对多用到python的broadcast机制
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)  #最大重合率
            jmax = np.argmax(overlaps)  #最大重合率对应的gt

        if ovmax > ovthresh:  #如果当前检测结果与真实标注最大重合率满足阈值
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.  #正检数目+1
                    R['det'][
                        jmax] = 1  #该gt被置为已检测到,下一次若还有另一个检测结果与之重合率满足阈值,则不能认为多检测到一个目标
                else:  #相反,认为检测到一个虚警
                    fp[d] = 1.
        else:  #不满足阈值,肯定是虚警
            fp[d] = 1.

    # compute precision recall
    fp = np.cumsum(fp)  #积分图,在当前节点前的虚警数量,fp长度
    tp = np.cumsum(tp)  #积分图,在当前节点前的正检数量
    rec = tp / float(npos)  #召回率,长度20000,从0到1
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth 准确率,长度20000,长度20000,从1到0
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap
Esempio n. 35
0
npop = 100  # number of episodes
sigma = 0.1
alpha = 0.03
iter_num = 300
aver_reward = None
allow_writing = True
reload = False  # reload = True basically loads a pre-made (and pretty good) model - it's supposed to be kind of a demo
iterations = 1000

#graphing set up
areward = []

print(hl_size, version, npop, sigma, alpha, iter_num)

if reload:
    model = pickle.load(open('model-pedal%d.p' % version,
                             'rb'))  # loads pre-made model
else:  # creates new, random model
    model = {}
    #np.random.randn fills with random samples from standardized normal distribution
    model['W1'] = np.random.randn(24, hl_size) / np.sqrt(
        24)  # input-hiddenlayer ... 24 x hl_size
    model['W2'] = np.random.randn(hl_size, 4) / np.sqrt(
        hl_size)  # hiddenlayer-output


def afunction(x):
    return x / (1 + np.absolute(x))


def get_action(state, model):
    #print(state)
Esempio n. 36
0
import _pickle as pickle  # cPickle
from collections import Counter  #tallies the total count of words in a list
import keras
import postprocessing as pr  #possibly from https://github.com/steerapi/seq2seq-show-att-tell/blob/master/generate_pretrained_embedding.py

# load data
with open('../data/tokens.pkl', 'rb') as fp:  # use create_picle_file.py
    #heads, desc, and keywords as separate arrays
    heads, descs, keywords = pickle.load(fp)

# headings tupal
i = 0
heads[i]
# Remainders : Super wi-fi edition

#Articles
descs[i]


#tokenize text, return vocab in order of usage (the should come first)
def get_vocab(combinedText):
    # TODO try to get vocab and count in another way,
    words = combinedText.split()
    vocab = [word for word, word_count in Counter(words).most_common()]
    return vocab


vocab = get_vocab(heads[i] + descs[i])

print(vocab[:50])
print('...', len(vocab))
Esempio n. 37
0
from neoStructures import *
import matplotlib.pyplot as plt
from os.path import isdir, join
import _pickle
import seaborn as sns
sns.set()
sns.set_context('paper')

import sys
print(sys.version)

#######################
# Load neoEpoch data with _pickle
data_dir = join('pySpikeAnalysis',
                'sample_data') if isdir('pySpikeAnalysis') else join(
                    '..', '..', 'pySpikeAnalysis', 'sample_data')
neo_epoch_filename = r'neoepoch_071118_1132.p'
with open(join(data_dir, neo_epoch_filename), 'rb') as f:
    neo_epoch = _pickle.load(f)

neo_epoch.save_fig = 0

##############################
# See information about NeoAll
print(neo_epoch)

##############################
# Plot the raster plot for unit 4
neo_epoch.plot_rasterplot(4)
Esempio n. 38
0
    ## BPE args
    parser.add_argument('--bpe_codes', type=str, default='data/bpe.codes')
    parser.add_argument('--bpe_vocab', type=str, default='data/vocab.txt')
    parser.add_argument('--bpe_vocab_thresh', type=int, default=50)

    args = parser.parse_args()

    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu    

    # load saved models
    pp_model = torch.load(args.pp_model)
    parse_model = torch.load(args.parse_model)

    # load vocab
    pp_vocab, rev_pp_vocab = cPickle.load(open(args.vocab, 'rb'))

    tag_file = codecs.open(args.parse_vocab, 'r', 'utf-8')
    parse_gen_voc = {}
    for idx, line in enumerate(tag_file):
        line = line.strip()
        parse_gen_voc[line] = idx
    rev_label_voc = dict((v,k) for (k,v) in parse_gen_voc.items()) 

    # load paraphrase network
    pp_args = pp_model['config_args']
    net = SCPN(pp_args.d_word, pp_args.d_hid, pp_args.d_nt, pp_args.d_trans,
        len(pp_vocab), len(parse_gen_voc) - 1, pp_args.use_input_parse)
    net.cuda()
    net.load_state_dict(pp_model['state_dict'])
    net.eval()
Esempio n. 39
0
def main():
    parser = argparse.ArgumentParser(description='Mask R-CNN')
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--lr', '-l', type=float, default=1e-3)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Output directory')
    parser.add_argument('--iteration', '-i', type=int, default=200000)
    parser.add_argument('--weight', '-w', type=str, default='')
    parser.add_argument('--label_file',
                        '-f',
                        type=str,
                        default='data/label_coco.txt')
    parser.add_argument('--backbone', type=str, default='fpn')
    parser.add_argument('--head_arch', '-a', type=str, default='fpn')
    parser.add_argument('--multi_gpu', '-m', type=int, default=0)
    parser.add_argument('--batch_size', '-b', type=int, default=1)

    args = parser.parse_args()

    print('lr:{}'.format(args.lr))
    print('output:{}'.format(args.out))
    print('weight:{}'.format(args.weight))
    print('label file:{}'.format(args.label_file))
    print('iteration::{}'.format(args.iteration))
    print('backbone architecture:{}'.format(args.backbone))
    print('head architecture:{}'.format(args.head_arch))

    if args.multi_gpu:
        print(
            'try to use chainer.training.updaters.MultiprocessParallelUpdater')
        if not chainer.training.updaters.MultiprocessParallelUpdater.available(
        ):
            print('MultiprocessParallelUpdater is not available')
            args.multi_gpu = 0

    with open(args.label_file, "r") as f:
        labels = f.read().strip().split("\n")

    faster_rcnn = MaskRCNNResnet50(n_fg_class=len(labels),
                                   backbone=args.backbone,
                                   head_arch=args.head_arch)
    faster_rcnn.use_preset('evaluate')
    model = FPNMaskRCNNTrainChain(faster_rcnn, mask_loss_fun=calc_mask_loss)
    if exists(args.weight):
        chainer.serializers.load_npz(args.weight,
                                     model.faster_rcnn,
                                     strict=False)

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    pkl_file = 'train_data.pkl'
    if isfile(pkl_file):
        print('pklから読み込みます')
        dataload_start = time.time()
        with open(pkl_file, 'rb') as f:
            coco_train_data = pickle.load(f)
        dataload_end = time.time()
        print('pklからの読み込み {}'.format(dataload_end - dataload_start))
    else:
        dataload_start = time.time()
        coco_train_data = COCOMaskLoader(category_filter=labels)
        dataload_end = time.time()
        print('普通の読み込み {}'.format(dataload_end - dataload_start))
        print('次回のために保存します')
        with open(pkl_file, 'wb') as f:
            pickle.dump(coco_train_data, f)

    train_data = TransformDataset(coco_train_data, Transform(faster_rcnn))

    if args.multi_gpu:
        train_iters = [
            chainer.iterators.SerialIterator(train_data,
                                             1,
                                             repeat=True,
                                             shuffle=True) for i in range(8)
        ]
        updater = chainer.training.updater.MultiprocessParallelUpdater(
            train_iters, optimizer, device=range(8))

    else:
        train_iter = chainer.iterators.SerialIterator(
            train_data, batch_size=args.batch_size, repeat=True, shuffle=False)
        updater = chainer.training.updater.StandardUpdater(train_iter,
                                                           optimizer,
                                                           device=args.gpu)

    trainer = chainer.training.Trainer(updater, (args.iteration, 'iteration'),
                                       args.out)

    trainer.extend(extensions.snapshot_object(
        model.faster_rcnn, 'model_{.updater.iteration}.npz'),
                   trigger=(5000, 'iteration'))

    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=(2, 'epoch'))

    log_interval = 100, 'iteration'
    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'iteration',
        'epoch',
        'elapsed_time',
        'lr',
        'main/loss',
        'main/mask_loss',
        'main/roi_loc_loss',
        'main/roi_cls_loss',
        'main/rpn_loc_loss',
        'main/rpn_cls_loss',
    ]),
                   trigger=(100, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=200))
    trainer.extend(extensions.dump_graph('main/loss'))

    save_args(args, args.out)
    trainer.extend(CommandsExtension(), trigger=(100, 'iteration'))

    trainer.run()
Esempio n. 40
0
    f.write('abc\n')
    f.write('def')
    f.write('feg')

l1 = [[3, 6, 9], [6, 8, 5], [9, 7, 2]]
out_file = 'out_list.txt'
with open(out_file, 'w') as f:
    f.write(str(l1))

print('------ PICKLE -------')
import _pickle
out_file = 'out_list.dat'
with open(out_file, 'wb') as f:
    _pickle.dump(l1, f)

with open(out_file, 'rb') as f:
    out_obj = _pickle.load(f)
    print(out_obj)
    print(sum(out_obj[0]))

print('------ JSON -------')
import json
out_file = 'out_list.json'
with open(out_file, 'w') as f:
    json.dump(l1, f)

with open(out_file, 'r') as f:
    out_obj = json.load(f)
    print(out_obj)
    print(sum(out_obj[0]))
Esempio n. 41
0
def read(num_steps, binaresed=True):
    ###
    root = './data/'
    data_set = pd.read_table(root + 'train.tsv', sep='\t', header=None)
    data_set = data_set.drop(columns=[0, 8, 9, 10, 11, 12])
    data_set = data_set.rename(
        columns={
            1: "label",
            2: "statement",
            3: "subject",
            4: "speaker",
            5: "job",
            6: "state",
            7: "party",
            13: "venue"
        })

    embeddings_index = {}
    with open(root + 'glove.6B.100d.txt', encoding="utf8") as fp:
        for line in fp:
            values = line.split()
            vectors = np.asarray(values[1:], dtype='float32')
            embeddings_index[values[0].lower()] = vectors

    val_set = pd.read_table(root + 'valid.tsv', sep='\t', header=None)
    val_set = val_set.drop(columns=[0, 8, 9, 10, 11, 12])
    val_set = val_set.rename(
        columns={
            1: "label",
            2: "statement",
            3: "subject",
            4: "speaker",
            5: "job",
            6: "state",
            7: "party",
            13: "venue"
        })
    ###
    test_set = pd.read_table(root + 'test.tsv', sep='\t', header=None)
    test_set = test_set.drop(columns=[0, 8, 9, 10, 11, 12])
    test_set = test_set.rename(
        columns={
            1: "label",
            2: "statement",
            3: "subject",
            4: "speaker",
            5: "job",
            6: "state",
            7: "party",
            13: "venue"
        })
    ###
    if binaresed == True:
        dim_class = 2
        label_dict = {
            'pants-fire': 0,
            'false': 0,
            'barely-true': 0,
            'half-true': 1,
            'mostly-true': 1,
            'true': 1
        }
        label_reverse_arr = [
            'pants-fire', 'false', 'barely-true', 'half-true', 'mostly-true',
            'true'
        ]
    else:
        dim_class = 6
        label_dict = {
            'pants-fire': 0,
            'false': 1,
            'barely-true': 2,
            'half-true': 3,
            'mostly-true': 4,
            'true': 5
        }
        label_reverse_arr = [
            'pants-fire', 'false', 'barely-true', 'half-true', 'mostly-true',
            'true'
        ]
    ### __Transform the label into real scalars__
    def create_one_hot(x):
        return keras.utils.to_categorical(label_dict[x], num_classes=6)

    data_set['label_id'] = data_set['label'].apply(lambda x: label_dict[x])
    val_set['label_id'] = val_set['label'].apply(lambda x: label_dict[x])
    test_set['label_id'] = test_set['label'].apply(lambda x: label_dict[x])
    ### __Transform speakers as real scalars__
    speakers = [
        'barack-obama', 'donald-trump', 'hillary-clinton', 'mitt-romney',
        'scott-walker', 'john-mccain', 'rick-perry', 'chain-email',
        'marco-rubio', 'rick-scott', 'ted-cruz', 'bernie-s', 'chris-christie',
        'facebook-posts', 'charlie-crist', 'newt-gingrich', 'jeb-bush',
        'joe-biden', 'blog-posting', 'paul-ryan'
    ]
    speaker_dict = {}
    for cnt, speaker in enumerate(speakers):
        speaker_dict[speaker] = cnt

    def map_speaker(speaker):
        if isinstance(speaker, str):
            speaker = speaker.lower()
            matches = [s for s in speakers if s in speaker]
            if len(matches) > 0:
                return speaker_dict[matches[0]]  #Return index of first match
            else:
                return len(speakers)
        else:
            return len(speakers)  #Nans or un-string data goes here.

    data_set['speaker_id'] = data_set['speaker'].apply(map_speaker)
    val_set['speaker_id'] = val_set['speaker'].apply(map_speaker)
    ### __Transform job as real scalar__
    data_set['job'].value_counts()[:10]
    job_list = [
        'president', 'u.s. senator', 'governor', 'president-elect',
        'presidential candidate', 'u.s. representative', 'state senator',
        'attorney', 'state representative', 'congress'
    ]

    job_dict = {
        'president': 0,
        'u.s. senator': 1,
        'governor': 2,
        'president-elect': 3,
        'presidential candidate': 4,
        'u.s. representative': 5,
        'state senator': 6,
        'attorney': 7,
        'state representative': 8,
        'congress': 9
    }

    def map_job(job):
        if isinstance(job, str):
            job = job.lower()
            matches = [s for s in job_list if s in job]
            if len(matches) > 0:
                return job_dict[matches[0]]  #Return index of first match
            else:
                return 10  #This maps any other job to index 10
        else:
            return 10  #Nans or un-string data goes here.

    data_set['job_id'] = data_set['job'].apply(map_job)
    val_set['job_id'] = val_set['job'].apply(map_job)
    ### __Transform party as real scalar__
    data_set['party'].value_counts()
    party_dict = {
        'republican': 0,
        'democrat': 1,
        'none': 2,
        'organization': 3,
        'newsmaker': 4
    }

    #default index for rest party is 5
    def map_party(party):
        if party in party_dict:
            return party_dict[party]
        else:
            return 5

    data_set['party_id'] = data_set['party'].apply(map_party)
    val_set['party_id'] = val_set['party'].apply(map_party)
    ### __Transform states as real scalar__
    #print data_set['state'].value_counts()[0:50]
    #Possible groupings (50 groups + 1 for rest)
    states = [
        'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
        'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
        'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
        'Maine'
        'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
        'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
        'New Jersey', 'New Mexico', 'New York', 'North Carolina',
        'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
        'Rhode Island', 'South  Carolina', 'South Dakota', 'Tennessee',
        'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia',
        'Wisconsin', 'Wyoming'
    ]
    #states_dict = {}
    #i = 0
    #for state in states:
    #    state_key = state.lower()
    #    states_dict[state_key] = i
    #    i += 1
    #print len(states_dict.keys())

    states_dict = {
        'wyoming': 48,
        'colorado': 5,
        'washington': 45,
        'hawaii': 10,
        'tennessee': 40,
        'wisconsin': 47,
        'nevada': 26,
        'north dakota': 32,
        'mississippi': 22,
        'south dakota': 39,
        'new jersey': 28,
        'oklahoma': 34,
        'delaware': 7,
        'minnesota': 21,
        'north carolina': 31,
        'illinois': 12,
        'new york': 30,
        'arkansas': 3,
        'west virginia': 46,
        'indiana': 13,
        'louisiana': 17,
        'idaho': 11,
        'south  carolina': 38,
        'arizona': 2,
        'iowa': 14,
        'mainemaryland': 18,
        'michigan': 20,
        'kansas': 15,
        'utah': 42,
        'virginia': 44,
        'oregon': 35,
        'connecticut': 6,
        'montana': 24,
        'california': 4,
        'massachusetts': 19,
        'rhode island': 37,
        'vermont': 43,
        'georgia': 9,
        'pennsylvania': 36,
        'florida': 8,
        'alaska': 1,
        'kentucky': 16,
        'nebraska': 25,
        'new hampshire': 27,
        'texas': 41,
        'missouri': 23,
        'ohio': 33,
        'alabama': 0,
        'new mexico': 29
    }

    def map_state(state):
        if isinstance(state, str):
            state = state.lower()
            if state in states_dict:
                return states_dict[state]
            else:
                if 'washington' in state:
                    return states_dict['washington']
                else:
                    return 50  #This maps any other location to index 50
        else:
            return 50  #Nans or un-string data goes here.

    data_set['state_id'] = data_set['state'].apply(map_state)
    val_set['state_id'] = val_set['state'].apply(map_state)
    ### __Transform subject as real scalar__
    data_set['subject'].value_counts()[0:5]
    #Possible groups (14)
    subject_list = [
        'health', 'tax', 'immigration', 'election', 'education',
        'candidates-biography', 'economy', 'gun', 'jobs', 'federal-budget',
        'energy', 'abortion', 'foreign-policy'
    ]

    subject_dict = {
        'health': 0,
        'tax': 1,
        'immigration': 2,
        'election': 3,
        'education': 4,
        'candidates-biography': 5,
        'economy': 6,
        'gun': 7,
        'jobs': 8,
        'federal-budget': 9,
        'energy': 10,
        'abortion': 11,
        'foreign-policy': 12
    }

    #health-care,taxes,immigration,elections,education,candidates-biography,guns,
    #economy&jobs ,federal-budget,energy,abortion,foreign-policy,state-budget, rest
    #Economy & Jobs is bundled together, because it occurs together
    def map_subject(subject):
        if isinstance(subject, str):
            subject = subject.lower()
            matches = [s for s in subject_list if s in subject]
            if len(matches) > 0:
                return subject_dict[matches[0]]  #Return index of first match
            else:
                return 13  #This maps any other subject to index 13
        else:
            return 13  #Nans or un-string data goes here.

    data_set['subject_id'] = data_set['subject'].apply(map_subject)
    val_set['subject_id'] = val_set['subject'].apply(map_subject)
    ### __Transform venue as real scalar__
    data_set['venue'].value_counts()[0:15]

    venue_list = [
        'news release', 'interview', 'tv', 'radio', 'campaign',
        'news conference', 'press conference', 'press release', 'tweet',
        'facebook', 'email'
    ]
    venue_dict = {
        'news release': 0,
        'interview': 1,
        'tv': 2,
        'radio': 3,
        'campaign': 4,
        'news conference': 5,
        'press conference': 6,
        'press release': 7,
        'tweet': 8,
        'facebook': 9,
        'email': 10
    }

    def map_venue(venue):
        if isinstance(venue, str):
            venue = venue.lower()
            matches = [s for s in venue_list if s in venue]
            if len(matches) > 0:
                return venue_dict[matches[0]]  #Return index of first match
            else:
                return 11  #This maps any other venue to index 11
        else:
            return 11  #Nans or un-string data goes here.

    #possibe groups (12)
    #news release, interview, tv (television), radio, campaign, news conference, press conference, press release,
    #tweet, facebook, email, rest
    data_set['venue_id'] = data_set['venue'].apply(map_venue)
    val_set['venue_id'] = val_set['venue'].apply(map_venue)
    ### #Tokenize statement and vocab test
    vocab_dict = {}
    from keras.preprocessing.text import Tokenizer
    if not os.path.exists('vocab.p'):
        t = Tokenizer()
        t.fit_on_texts(data_set['statement'])
        vocab_dict = t.word_index
        cpickle.dump(t.word_index, open("vocab.p", "wb"))
        print('Vocab dict is created')
        print('Saved vocab dict to pickle file')
    else:
        print('Loading vocab dict from pickle file')
        vocab_dict = cpickle.load(open("vocab.p", "rb"))
    ## #Get all preprocessing done for test data
    test_set['job_id'] = test_set['job'].apply(map_job)  #Job
    test_set['party_id'] = test_set['party'].apply(map_party)  #Party
    test_set['state_id'] = test_set['state'].apply(map_state)  #State
    test_set['subject_id'] = test_set['subject'].apply(map_subject)  #Subject
    test_set['venue_id'] = test_set['venue'].apply(map_venue)  #Venue
    test_set['speaker_id'] = test_set['speaker'].apply(map_speaker)  #Speaker

    #To access particular word_index. Just load these.
    #To read a word in a sentence use keras tokenizer again, coz easy
    from keras.preprocessing.text import text_to_word_sequence
    from keras.preprocessing import sequence

    #text = text_to_word_sequence(data_set['statement'][0])
    #print text
    #val = [vocab_dict[t] for t in text]
    #print val

    def pre_process_statement(statement):
        text = text_to_word_sequence(statement)
        val = [0] * 10
        val = [vocab_dict[t] for t in text
               if t in vocab_dict]  #Replace unk words with 0 index
        return val

    #Creating embedding matrix to feed in embeddings directly bruv
    num_words = len(vocab_dict) + 1
    embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
    for word, i in vocab_dict.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    #I have reset embeddings_index since it would take a lot of memory
    embeddings_index = None

    ####
    #Hyper parameter definitions
    vocab_length = len(vocab_dict.keys())

    data_set['word_ids'] = data_set['statement'].apply(pre_process_statement)
    val_set['word_ids'] = val_set['statement'].apply(pre_process_statement)
    test_set['word_ids'] = test_set['statement'].apply(pre_process_statement)
    X_train = data_set['word_ids']
    Y_train = data_set['label_id']
    X_val = val_set['word_ids']
    Y_val = val_set['label_id']
    X_test = test_set['word_ids']
    Y_test = test_set['label_id']
    X_train = sequence.pad_sequences(X_train,
                                     maxlen=num_steps,
                                     padding='post',
                                     truncating='post')
    Y_train = keras.utils.to_categorical(Y_train, num_classes=dim_class)
    X_val = sequence.pad_sequences(X_val,
                                   maxlen=num_steps,
                                   padding='post',
                                   truncating='post')
    Y_val = keras.utils.to_categorical(Y_val, num_classes=dim_class)
    X_test = sequence.pad_sequences(X_test,
                                    maxlen=num_steps,
                                    padding='post',
                                    truncating='post')
    Y_test = keras.utils.to_categorical(Y_test, num_classes=dim_class)
    ###
    #Meta data preparation
    a = keras.utils.to_categorical(data_set['party_id'], num_classes=num_party)
    b = keras.utils.to_categorical(data_set['state_id'], num_classes=num_state)
    c = keras.utils.to_categorical(data_set['venue_id'], num_classes=num_venue)
    d = keras.utils.to_categorical(data_set['job_id'], num_classes=num_job)
    e = keras.utils.to_categorical(data_set['subject_id'], num_classes=num_sub)
    f = keras.utils.to_categorical(data_set['speaker_id'],
                                   num_classes=num_speaker)
    X_train_meta = np.hstack((a, b, c, d, e, f))  #concat a and b
    a_val = keras.utils.to_categorical(val_set['party_id'],
                                       num_classes=num_party)
    b_val = keras.utils.to_categorical(val_set['state_id'],
                                       num_classes=num_state)
    c_val = keras.utils.to_categorical(val_set['venue_id'],
                                       num_classes=num_venue)
    d_val = keras.utils.to_categorical(val_set['job_id'], num_classes=num_job)
    e_val = keras.utils.to_categorical(val_set['subject_id'],
                                       num_classes=num_sub)
    f_val = keras.utils.to_categorical(val_set['speaker_id'],
                                       num_classes=num_speaker)
    X_val_meta = np.hstack(
        (a_val, b_val, c_val, d_val, e_val, f_val))  #concat a_val and b_val
    a_test = keras.utils.to_categorical(test_set['party_id'],
                                        num_classes=num_party)
    b_test = keras.utils.to_categorical(test_set['state_id'],
                                        num_classes=num_state)
    c_test = keras.utils.to_categorical(test_set['venue_id'],
                                        num_classes=num_venue)
    d_test = keras.utils.to_categorical(test_set['job_id'],
                                        num_classes=num_job)
    e_test = keras.utils.to_categorical(test_set['subject_id'],
                                        num_classes=num_sub)
    f_test = keras.utils.to_categorical(test_set['speaker_id'],
                                        num_classes=num_speaker)
    X_test_meta = np.hstack((a_test, b_test, c_test, d_test, e_test,
                             f_test))  #concat all test data
    return (X_train_meta, X_val_meta,
            X_test_meta), (X_train, Y_train), (X_val, Y_val), (
                X_test, Y_test
            ), vocab_length, EMBEDDING_DIM, embedding_matrix, label_reverse_arr
# -*- coding: utf-8 -*-
"""
Created on Mon May  6 16:27:02 2019

@author: Turing
"""
from _pickle import load
from nltk.stem.snowball import SpanishStemmer

diccionario_polaridad = {}
inputt = open('diccionario_polaridades.pk1', 'rb')
diccionario_polaridad = load(inputt)
inputt.close()

resenhas_categoria = {}
inputt = open('resenhas_por_categoria_dict.pk1', 'rb')
resenhas_categoria = load(inputt)
inputt.close()

categoria_polaridad = []

palabras_no_encontradas = 0
total_palabras_encontradas = 0
ss = SpanishStemmer()

for categoria in resenhas_categoria:
    valor_categoria = 0
    palabras_encontradas = 0
    for resenha in resenhas_categoria[categoria]:
        for word in resenha[0].split():
            if diccionario_polaridad.get(ss.stem(word).lower()):
Esempio n. 43
0
        'SrTEMP_FLUX_ADJ','SrTEMP_FLUX_FB',\
        'SrWV_FLUX_ADJ','SrWV_FLUX_FB',\
        'TrWV_FLUX_ADJ','TrWV_FLUX_FB',\
        'TrALB_FLUX_ADJ','TrALB_FLUX_FB']

Kernels = ['CAM3', 'CAM5', 'ECHAM6_ctr', 'ERA', 'GFDL', 'HadGEM2']

print('read in data')

start_time = time.time()
Variables = dict()
for v in range(len(Vari)):
    Variables[Vari[v]] = dict()
    for k in range(len(Kernels)):
        if Vari[v] == 'DIR_FLUX' or Vari[v] == 'DIR_FLUXCS':
            Variables[Vari[v]][Kernels[k]] = pk.load(open(\
            Source+models+'_'+Vari[v]+'_Grid.pi','rb'))[Kernels[k]]
        else:
            Variables[Vari[v]][Kernels[k]] = np.expand_dims(pk.load(open(\
            Source+models+'_'+Vari[v]+'_Grid.pi','rb'))[Kernels[k]],axis=0)

end_time = time.time() - start_time
print('time to read in =', end_time / 60, 'minutes')

start_time = time.time()
print('area averaging')
Variables_AA = dict()
Variables_Flatten = dict()
for v in range(len(Vari)):
    Variables_AA[Vari[v]] = dict()
    Variables_Flatten[Vari[v]] = dict()
    print('on variable', Vari[v])
Esempio n. 44
0
model_bin.compile(loss='binary_crossentropy', optimizer=nadam_opt)
model_bin.fit(training_LSTM_8_pad, y_LSTM_8, epochs=4)
#early_stopping = EarlyStopping(patience=0,mode="min",monitor='val_loss')
final = time.time()

model_bin.save("./binaryKeras.model")
model_bin = tensorflow.keras.models.load_model("./binaryKeras.model")

explainer_bin = shap.DeepExplainer(model_bin, training_LSTM_8_pad)

training_LSTM_8_pad_B = explainer_bin.shap_values(training_LSTM_8_pad)

pickle.dump(training_LSTM_8_pad_B, open("./fitxerShapleyLSTM_8_B", "wb"))

training_LSTM_8_pad_B = pickle.load(open("./fitxerShapleyLSTM_8_B", "rb"))

nameVariables = list(proyectos.columns)
nameVariables[0] = "UN LDCs"
nameVariables[1] = "GDP per capita"
nameVariables[2] = "Public Grant"
nameVariables[3] = "Budget Previous Year"
nameVariables[4] = "Donor Aid Budget"
nameVariables[5] = "Latin America Mission"
nameVariables[6] = "Africa Mission"

shap_Specific = []
shap_SpecificValues = []
shap_Specific_P = []
shap_SpecificValues_P = []
len(training_LSTM_8_pad_B[0][0])
Esempio n. 45
0
from utilities import label_img_to_color

from model import ENet_model
from config import output_dir, run_dir, demo_dir

# environ['CUDA_VISIBLE_DEVICES'] = "2"

@cli_parse
class G:
    model_id = "demo_sequence"
    data_path = "../../datasets/miniscapes-processed/demoVideo/stuttgart_00"
    results_dir = "../../runs/image-segmentation/stuttgart_02"


# load the mean color channels of the train imgs:
train_mean_channels = cPickle.load(open(path.join(output_dir, "mean_channels.pkl"), "rb"))

# load the sequence data:
seq_frame_paths = []
frame_names = sorted(listdir(G.data_path))
for step, frame_name in enumerate(tqdm(frame_names)):
    frame_path = path.join(G.data_path, frame_name)
    seq_frame_paths.append(frame_path)

# validate_files(seq_frame_paths)
# exit()

# define where to place the resulting images:
try:
    makedirs(G.results_dir)
except FileExistsError as e:
Esempio n. 46
0
    def load(self):
        if self.cv is None:
            cl_train, cl_val = None, None
        else:
            cl_train, cl_val = self.filter_cl()

        with open(self.pkl_path, 'rb') as pkl_file:
            # read response
            res = pkl.load(pkl_file)
            res = res.loc[res['SOURCE'] == self.source]
            if cl_train is not None:
                res_train = res[res['ccl_name'].isin(cl_train)]
                res_val = res[res['ccl_name'].isin(cl_val)]
            else:
                res_train = res
                res_val = None
            # load cl properties and filter by geneGE
            genomics = pkl.load(pkl_file)
            cols = [
                x if x.startswith('geneGE_') else None
                for x in genomics.columns.tolist()
            ]
            cols = list(filter(lambda x: x is not None, cols))
            genomics = genomics[cols]
            # load drug descriptors
            drug = pkl.load(pkl_file)

        df_y_train = res_train.reset_index(drop=True)
        df_x_train_cl = df_y_train.merge(genomics,
                                         left_on='ccl_name',
                                         how='left',
                                         right_index=True)
        df_x_train_dr = df_y_train.merge(drug,
                                         left_on='ctrpDrugID',
                                         how='left',
                                         right_index=True)
        df_x_train_cl.drop(columns=[
            'SOURCE', 'ccl_name', 'ctrpDrugID', 'area_under_curve', 'groupID'
        ],
                           inplace=True)
        df_x_train_dr.drop(columns=[
            'SOURCE', 'ccl_name', 'ctrpDrugID', 'area_under_curve', 'groupID'
        ],
                           inplace=True)

        df_y_val = res_val.reset_index(drop=True)
        df_x_val_cl = df_y_val.merge(genomics,
                                     left_on='ccl_name',
                                     how='left',
                                     right_index=True)
        df_x_val_dr = df_y_val.merge(drug,
                                     left_on='ctrpDrugID',
                                     how='left',
                                     right_index=True)
        df_x_val_cl.drop(columns=[
            'SOURCE', 'ccl_name', 'ctrpDrugID', 'area_under_curve', 'groupID'
        ],
                         inplace=True)
        df_x_val_dr.drop(columns=[
            'SOURCE', 'ccl_name', 'ctrpDrugID', 'area_under_curve', 'groupID'
        ],
                         inplace=True)

        return (df_y_train, df_x_train_cl,
                df_x_train_dr), (df_y_val, df_x_val_cl, df_x_val_dr)
Esempio n. 47
0
	def load_model_pickle(self, name, path ='../../feature_groups/lda_pickles'):
		print("loading model: "+name)
		path = os.path.join(path, name)
		with open(path, "rb") as f:
			self.model = _pickle.load(f)
Esempio n. 48
0
import _pickle
import sklearn
import numpy

import sys

try:
    project = sys.argv[1]
    input_path = sys.argv[2]
    model_path = 'trained_model/' + sys.argv[1] + '.pkl'
except:
    print('No argument, default model: mesos')
    project = 'mesos'
    input_path = 'input/mesos.pkl'
    model_path = 'trained_model/' + 'mesos' + '_porru.pkl'

input = numpy.array(_pickle.load(input_path))
clf = _pickle.load(model_path)
predict = clf.predict()

print(predict)
Esempio n. 49
0
def Load_Data(data_name):

    if data_name == 'IMDB':
        data = cPickle.load(open('./imdb.pkl', 'rb'), encoding='iso-8859-1')
    if data_name == 'ELEC':
        data = cPickle.load(open('./ELEC_30k_cwc.pkl', 'rb'),
                            encoding='iso-8859-1')
    if data_name == 'IMDB_10':
        new_data = cPickle.load(
            open('/home/s/CNN-BiLSTM2/hedwig/BCPGDS_decoder/imdb_data.pkl',
                 'rb'))
    if data_name == 'Reuters':
        new_data = cPickle.load(open('./Reuters_data.pkl', 'rb'))
        new_data_bow = cPickle.load(open('./Reuters_data_bow.pkl', 'rb'))

    if data_name == 'IMDB_10' or data_name == 'Reuters':
        #doc_labels = data['labels']
        #word_freq = data['word_freq']
        word2index = new_data.stoi
        #word2index = {key: idx for key, idx in word2index1.items() if idx < 3000}
        index2word = new_data.itos
        #train_doc_word = data['train_doc_word']
        train_doc_split = new_data.data['train']
        if data_name == 'IMDB_10':
            train_doc_split = train_doc_split
            train_doc_label = np.array(new_data.label['train'])
        else:
            train_doc_split = train_doc_split
            train_doc_label = np.array(new_data_bow.label['train'])
        if data_name == 'IMDB_10':
            train_doc_label = train_doc_label
        else:
            train_doc_label = train_doc_label
        #test_doc_word = data['test_doc_word']
        test_doc_split = new_data.data['test']
        #test_doc_label = np.array(new_data_bow.label['test'])
        if data_name == 'IMDB_10':
            test_doc_split = test_doc_split
            test_doc_label = np.array(new_data.label['test'])
        else:
            test_doc_split = test_doc_split
            test_doc_label = np.array(new_data_bow.label['test'])
        seq_max_len = 0
        # seq_min_len = 999
        train_doc_len = []
        for i in range(len(train_doc_split)):
            train_doc_len.append(len(train_doc_split[i]))
        test_doc_len = []
        for i in range(len(test_doc_split)):
            test_doc_len.append(len(test_doc_split[i]))

        Data_save = {}
        Data_save['word2index'] = word2index
        Data_save['index2word'] = index2word
        Data_save['train_doc_split'] = train_doc_split
        Data_save['train_doc_label'] = train_doc_label
        Data_save['test_doc_split'] = test_doc_split
        Data_save['test_doc_label'] = test_doc_label
        cPickle.dump(Data_save, open('./Reuters_new.pkl', 'wb'))

    else:

        if data_name == 'ELEC':
            doc_labels = data['Label']
        else:
            doc_labels = data['labels']
        word_freq = data['word_freq']
        word2index = data['word2index']
        index2word = data['index2word']
        train_doc_word = data['train_doc_word']
        train_doc_index = data['train_doc_index']
        train_doc_label = np.array(data['train_doc_label'])
        test_doc_word = data['test_doc_word']
        test_doc_index = data['test_doc_index']
        test_doc_label = np.array(data['test_doc_label'])

        #==================================================
        #preprocess

        num_words = len(index2word)
        index2word[num_words] = '<pad_zero>'
        word2index['<pad_zero>'] = num_words
        num_words = num_words + 1

        # num_words = len(index2word)
        # index2word[1] = '<pad_zero>'
        # word2index['<pad_zero>'] = 1
        # #num_words = num_words + 1

        seq_max_len = 0
        # seq_min_len = 999
        train_doc_split = []
        train_doc_split_len = []
        train_doc_len = []
        split_index = [
            word2index['.'], word2index['!'], word2index['?'],
            word2index['..'], word2index[';']
        ]

        for i in range(len(train_doc_index)):

            [seqs_len, seqs] = Seq_Split(train_doc_index[i], split_index,
                                         word2index['<pad_zero>'])
            train_doc_split.append(seqs)
            train_doc_split_len.append(seqs_len)

            # tmp_min = Seq_Min_Len(seqs)
            # if tmp_min < seq_min_len:
            #     seq_min_len = tmp_min
            tmp_max = Seq_Max_Len(seqs)
            if tmp_max > seq_max_len:
                seq_max_len = tmp_max

            train_doc_len.append(len(seqs))

        test_doc_split = []
        test_doc_split_len = []
        test_doc_len = []

        for i in range(len(test_doc_index)):

            [seqs_len, seqs] = Seq_Split(test_doc_index[i], split_index,
                                         word2index['<pad_zero>'])
            test_doc_split.append(seqs)
            test_doc_split_len.append(seqs_len)

            # tmp_min = Seq_Min_Len(seqs)
            # if tmp_min < seq_min_len:
            #     seq_min_len = tmp_min
            tmp_max = Seq_Max_Len(seqs)
            if tmp_max > seq_max_len:
                seq_max_len = tmp_max

            test_doc_len.append(len(seqs))

    doc_max_len = max(Seq_Max_Len(train_doc_split),
                      Seq_Max_Len(test_doc_split))
    doc_min_len = min(Seq_Min_Len(train_doc_split),
                      Seq_Min_Len(test_doc_split))
    doc_max_len_word = max(Word_Max_len(train_doc_split),
                           Word_Max_len(test_doc_split))
    doc_ave_len = (Seq_Ave_Len(train_doc_split) +
                   Seq_Ave_Len(test_doc_split)) / 2

    return word2index, train_doc_split, train_doc_label, train_doc_len, test_doc_split, test_doc_label, test_doc_len
Esempio n. 50
0
def simple_search_dag(
    criteria,
    db=None,
    nbblocks=[64],
    min_seg_len=15,
    parallel=False,
    verbosity=0,
    timing=0,
    modbbs=None,
    make_edges=True,
    merge_bblock=None,
    merge_segment=None,
    precache_splices=False,
    precache_only=False,
    bbs=None,
    bblock_ranges=[],
    only_seg=None,
    source=None,
    print_edge_summary=False,
    no_duplicate_bases=False,
    shuffle_bblocks=False,
    use_saved_bblocks=False,
    output_prefix="./worms",
    only_ivertex=[],
    **kw,
):
    bbdb, spdb = db
    queries, directions = zip(*criteria.bbspec)
    tdb = time()
    if bbs is None:
        bbs = list()
        savename = output_prefix + "_bblocks.pickle"

        if use_saved_bblocks and os.path.exists(savename):
            with open(savename, "rb") as inp:
                bbnames_list = _pickle.load(inp)
            # for i, l in enumerate(bbnames_list)
            # if len(l) >= nbblocks[i]:
            # assert 0, f"too many bblocks in {savename}"
            for i, bbnames in enumerate(bbnames_list):
                bbs.append([bbdb.bblock(n) for n in bbnames[:nbblocks[i]]])

        else:
            for iquery, query in enumerate(queries):
                if hasattr(criteria, "cloned_segments"):
                    msegs = [
                        i + len(queries) if i < 0 else i
                        for i in criteria.cloned_segments()
                    ]
                    if iquery in msegs[1:]:
                        print("seg", iquery, "repeating bblocks from",
                              msegs[0])
                        bbs.append(bbs[msegs[0]])
                        continue
                bbs0 = bbdb.query(
                    query,
                    max_bblocks=nbblocks[iquery],
                    shuffle_bblocks=shuffle_bblocks,
                    parallel=parallel,
                )
                bbs.append(bbs0)

            if bblock_ranges:
                bbs_sliced = list()
                assert len(bblock_ranges) == 2 * len(bbs)
                for ibb, bb in enumerate(bbs):
                    lb, ub = bblock_ranges[2 * ibb:2 * ibb + 2]
                    bbs_sliced.append(bb[lb:ub])
                bbs = bbs_sliced

            for ibb, bb in enumerate(bbs):
                print("bblocks", ibb)
                for b in bb:
                    print("   ", bytes(b.file).decode("utf-8"))

        bases = [
            Counter(bytes(b.base).decode("utf-8") for b in bbs0)
            for bbs0 in bbs
        ]
        assert len(bbs) == len(queries)
        for i, v in enumerate(bbs):
            assert len(v) > 0, 'no bblocks for query: "' + queries[i] + '"'
        print("bblock queries:", str(queries))
        print("bblock numbers:", [len(b) for b in bbs])
        print("bblocks id:", [id(b) for b in bbs])
        print("bblock0 id ", [id(b[0]) for b in bbs])
        print("base_counts:")
        for query, basecount in zip(queries, bases):
            counts = " ".join(f"{k}: {c}" for k, c in basecount.items())
            print(f"   {query:10}", counts)

        if criteria.is_cyclic:
            # for a, b in zip(bbs[criteria.from_seg], bbs[criteria.to_seg]):
            # assert a is b
            bbs[criteria.to_seg] = bbs[criteria.from_seg]

        if use_saved_bblocks and not os.path.exists(savename):
            bbnames = [[bytes(b.file).decode("utf-8") for b in bb]
                       for bb in bbs]
            with open(savename, "wb") as out:
                _pickle.dump(bbnames, out)

    else:
        bbs = bbs.copy()

    assert len(bbs) == len(criteria.bbspec)
    if modbbs:
        modbbs(bbs)

    if merge_bblock is not None and merge_bblock >= 0:
        # print('cloned_segments', criteria.bbspec, criteria.cloned_segments())
        if hasattr(criteria, "cloned_segments") and merge_segment is None:
            for i in criteria.cloned_segments():
                # print('   ', 'merge seg', i, 'merge_bblock', merge_bblock)
                bbs[i] = (bbs[i][merge_bblock], )
        else:
            if merge_segment is None:
                merge_segment = 0
            # print('   ', 'merge_segment not None')
            # print('   ', [len(b) for b in bbs])
            # print('   ', 'merge_segment', merge_segment)
            # print('   ', 'merge_bblock', merge_bblock, len(bbs[merge_segment]))
            bbs[merge_segment] = (bbs[merge_segment][merge_bblock], )

    tdb = time() - tdb
    # info(
    # f'bblock creation time {tdb:7.3f} num bbs: ' +
    # str([len(x) for x in bbs])
    # )

    if precache_splices:
        bbnames = [[bytes(bb.file) for bb in bbtup] for bbtup in bbs]
        bbpairs = set()
        # for bb1, bb2, dirn1 in zip(bbnames, bbnames[1:], directions):
        for i in range(len(bbnames) - 1):
            bb1 = bbnames[i]
            bb2 = bbnames[i + 1]
            dirn1 = directions[i]
            rev = dirn1[1] == "N"
            if bbs[i] is bbs[i + 1]:
                bbpairs.update((a, a) for a in bb1)
            else:
                bbpairs.update(
                    (b, a) if rev else (a, b) for a in bb1 for b in bb2)
        precompute_splicedb(db,
                            bbpairs,
                            verbosity=verbosity,
                            parallel=parallel,
                            **kw)
    if precache_only:
        return bbs

    verts = [None] * len(queries)
    edges = [None] * len(queries[1:])
    if source:
        srcdirn = [
            "".join("NC_"[d] for d in source.verts[i].dirn)
            for i in range(len(source.verts))
        ]  # yapf: disable
        srcverts, srcedges = list(), list()
        for i, bb in enumerate(bbs):
            for isrc, bbsrc in enumerate(source.bbs):

                # fragile code... detecting this way can be wrong
                # print(i, isrc, directions[i], srcdirn[isrc])
                if directions[i] != srcdirn[isrc]:
                    continue
                if [b.filehash for b in bb] == [b.filehash for b in bbsrc]:
                    # super hacky fix, really need to be passed info on what's what
                    if srcverts and srcverts[-1] + 1 != isrc:
                        continue
                    verts[i] = source.verts[isrc]
                    srcverts.append(isrc)

        for i, bb in enumerate(zip(bbs, bbs[1:])):
            bb0, bb1 = bb
            for isrc, bbsrc in enumerate(zip(source.bbs, source.bbs[1:])):
                bbsrc0, bbsrc1 = bbsrc
                if directions[i] != srcdirn[isrc]:
                    continue
                if directions[i + 1] != srcdirn[isrc + 1]:
                    continue
                he = [b.filehash for b in bb0] == [b.filehash for b in bbsrc0]
                he &= [b.filehash for b in bb1] == [b.filehash for b in bbsrc1]
                if not he:
                    continue
                edges[i] = source.edges[isrc]
                srcedges.append(isrc)

    if not make_edges:
        edges = []

    tvertex = time()
    exe = InProcessExecutor()

    if parallel:
        exe = cf.ThreadPoolExecutor(max_workers=parallel)
    with exe as pool:
        if only_seg is not None:
            save = bbs, directions
            bbs = [bbs[only_seg]]
            directions = [directions[only_seg]]
            verts = [verts[only_seg]]
        futures = list()
        for i, bb in enumerate(bbs):
            dirn = directions[i]
            if verts[i] is None:
                futures.append(
                    pool.submit(Vertex, bb, dirn, min_seg_len=min_seg_len))
        verts_new = [f.result() for f in futures]
        isnone = [i for i in range(len(verts)) if verts[i] is None]
        for i, inone in enumerate(isnone):
            verts[inone] = verts_new[i]
            if source:
                print('use new vertex', inone)
        if only_ivertex:
            # raise NotImplementedError
            print("!!!!!!! using one ivertex !!!!!", only_ivertex, len(verts),
                  [v.len for v in verts])
            if len(only_ivertex) != len(verts):
                print(
                    "NOT altering verts, len(only_ivertex)!=len(verts) continuing...",
                    "this is ok if part of a sub-protocol")
            else:
                for i, v in enumerate(verts):
                    if v.len > 1:  # could already have been "trimmed"
                        assert only_ivertex[i] < v.len
                        v.reduce_to_only_one_inplace(only_ivertex[i])
                    # print('x2exit', v.x2exit.shape)
                    # print('x2orig', v.x2orig.shape)
                    # print('ires', v.ires.shape)
                    # print('isite', v.isite.shape)
                    # print('ichain', v.ichain.shape)
                    # print('ibblock', v.ibblock.shape)
                    # print('inout', v.inout.shape, v.inout[10:])
                    # print('inbreaks', v.inbreaks.shape, v.inbreaks[10:])
                    # print('dirn', v.dirn.shape)
                    # # assert 0
        # print(i, len(verts_new), len(verts))
        if isnone:
            assert i + 1 == len(verts_new)
        assert all(v for v in verts)
        if only_seg is not None:
            verts = [None] * only_seg + verts + [None] * (len(queries) -
                                                          only_seg - 1)
            bbs, directions = save
    tvertex = time() - tvertex
    # info(
    # f'vertex creation time {tvertex:7.3f} num verts ' +
    # str([v.len if v else 0 for v in verts])
    # )

    if make_edges:
        tedge = time()
        for i, e in enumerate(edges):
            if e is not None:
                continue
            edges[i], edge_analysis = Edge(
                verts[i],
                bbs[i],
                verts[i + 1],
                bbs[i + 1],
                splicedb=spdb,
                verbosity=verbosity,
                precache_splices=precache_splices,
                **kw,
            )
            allok = all(x[6] for x in edge_analysis)
            if allok:
                continue
            print("=" * 80)
            print("info for edges with no valid splices",
                  edges[i].total_allowed_splices())
            for tup in edge_analysis:
                iblk0, iblk1, ofst0, ofst1, ires0, ires1 = tup[:6]
                ok, f_clash, f_rms, f_ncontact, f_ncnh, f_nhc = tup[6:12]
                m_rms, m_ncontact, m_ncnh, m_nhc = tup[12:]
                if ok:
                    continue
                assert len(bbs[i + 0]) > iblk0
                assert len(bbs[i + 1]) > iblk1
                print("=" * 80)
                print("egde Bblock A", bytes(bbs[i][iblk0].file))
                print("egde Bblock B", bytes(bbs[i + 1][iblk1].file))
                print(
                    f"bb {iblk0:3} {iblk1:3}",
                    f"ofst {ofst0:4} {ofst1:4}",
                    f"resi {ires0.shape} {ires1.shape}",
                )
                print(
                    f"clash_ok {int(f_clash*100):3}%",
                    f"rms_ok {int(f_rms*100):3}%",
                    f"ncontact_ok {int(f_ncontact*100):3}%",
                    f"ncnh_ok {int(f_ncnh*100):3}%",
                    f"nhc_ok {int(f_nhc*100):3}%",
                )
                print(
                    f"min_rms {m_rms:7.3f}",
                    f"max_ncontact {m_ncontact:7.3f}",
                    f"max_ncnh {m_ncnh:7.3f}",
                    f"max_nhc {m_nhc:7.3f}",
                )
            print("=" * 80)
            fok = np.stack([x[7:12] for x in edge_analysis]).mean(axis=0)
            rmsmin = np.array([x[12] for x in edge_analysis]).min()
            fmx = np.stack([x[13:] for x in edge_analysis]).max(axis=0)
            print(f"{' SPLICE FAIL SUMMARY ':=^80}")
            print(f"splice clash ok               {int(fok[0]*100):3}%")
            print(f"splice rms ok                 {int(fok[1]*100):3}%")
            print(f"splice ncontacts ok           {int(fok[2]*100):3}%")
            print(f"splice ncontacts_no_helix ok  {int(fok[3]*100):3}%")
            print(f"splice nhelixcontacted ok     {int(fok[4]*100):3}%")
            print(f"min rms of any failing        {rmsmin}")
            print(
                f"max ncontact of any failing   {fmx[0]} (maybe large for non-5-helix splice)"
            )
            print(
                f"max ncontact_no_helix         {fmx[1]} (will be 999 for non-5-helix splice)"
            )
            print(
                f"max nhelix_contacted          {fmx[2]} (will be 999 for non-5-helix splice)"
            )
            print("=" * 80)
            assert edges[i].total_allowed_splices() > 0, "invalid splice"
        tedge = time() - tedge
        if print_edge_summary:
            _print_edge_summary(edges)
        # info(
        # f'edge creation time {tedge:7.3f} num splices ' +
        # str([e.total_allowed_splices()
        # for e in edges]) + ' num exits ' + str([e.len for e in edges])
        # )
        spdb.sync_to_disk()

    toret = SearchSpaceDag(criteria.bbspec, bbs, verts, edges)
    if timing:
        toret = toret, tdb, tvertex, tedge
    return toret
def get_group_cached_ts(group_file):

    ret_dict = {}

    have_axis = None
    have_dist = None
    have_ms = None

    for each_group_file in group_file:
        print('Loading group file %s' % each_group_file)
        all_data = cPickle.load(open(each_group_file, 'r'))

        max_kNN_len = all_data['max_kNN_len']
        ret_dict = update_cached_dict(ret_dict,
                                      'max_kNN_len',
                                      max_kNN_len,
                                      update_type='update')

        kNN_list = all_data['kNN_list']
        kNN_list = np.asarray(kNN_list)
        ret_dict = update_cached_dict(ret_dict, 'kNN_list', kNN_list)

        kNN_valid_flag = all_data['kNN_valid_flag']
        kNN_valid_flag = np.asarray(kNN_valid_flag)
        ret_dict = update_cached_dict(ret_dict, 'kNN_valid_flag',
                                      kNN_valid_flag)

        ret_dict = update_cached_dict(ret_dict, 'mult_mat',
                                      all_data['mult_mat'])
        ret_dict = update_cached_dict(ret_dict, 'mult_mat_space',
                                      all_data['mult_mat_space'])
        ret_dict = update_cached_dict(ret_dict, 'mult_mat_rev',
                                      all_data['mult_mat_rev'])
        ret_dict = update_cached_dict(ret_dict, 'num_p_rep',
                                      all_data['num_p_rep'])
        ret_dict = update_cached_dict(ret_dict, 'grav_flag',
                                      all_data['grav_flag'])

        depth_list, all_father_list = get_depth_father_list(all_data)
        max_depth = np.max(depth_list)
        ret_dict = update_cached_dict(ret_dict,
                                      'max_depth',
                                      max_depth,
                                      update_type='update')
        ret_dict = update_cached_dict(ret_dict, 'depth_list', depth_list)
        ret_dict = update_cached_dict(ret_dict, 'father_list', all_father_list)

        drt_father_list = get_drt_father_list(all_data)
        ret_dict = update_cached_dict(ret_dict, 'drt_father_list',
                                      drt_father_list)

        no, no_wo_group = all_data['mult_mat'].shape
        no_wo_group_flag = np.arange(no) < no_wo_group
        ret_dict = update_cached_dict(ret_dict, 'no_wo_group_flag',
                                      no_wo_group_flag)

        if have_axis:
            assert 'all_axis' in all_data, \
                "All group files should include all_axis! %s" % each_group_file
        if 'all_axis' in all_data:
            have_axis = True
            ret_dict = update_cached_dict(ret_dict, 'all_axis',
                                          all_data['all_axis'])
        else:
            have_axis = False

        if have_dist:
            assert 'all_dist' in all_data, \
                "All group files should include all_dist! %s" % each_group_file
        if 'all_dist' in all_data:
            have_dist = True
            ret_dict = update_cached_dict(ret_dict, 'all_dist',
                                          all_data['all_dist'])
        else:
            have_dist = False

        if have_ms:
            assert 'L2H_attribute' in all_data, \
                "All group files should include super node! %s" % each_group_file
        if 'L2H_attribute' in all_data:
            have_ms = True
            for key_now in [
                    'L2H_attribute', 'WG_attribute', 'H2L_attribute',
                    'L2H_division', 'WG_division'
            ]:
                ret_dict = update_cached_dict(ret_dict, key_now,
                                              all_data[key_now])
        else:
            have_dist = False

    ret_dict = pad_to_tensors(ret_dict)

    return ret_dict
Esempio n. 52
0
 def prepared(self):
     self.trainset = cPickle.load(
         open(self.root + 'item_user_recrods.pkl',
              'rb'))  #用户u :用户已看过的item:评分
     self.news_sim_mat = cPickle.load(
         open(self.root + 'item_sim_mat.pkl', 'rb'))  #新闻m1: 新闻m2: 相似度
Esempio n. 53
0
def _load_pickled(filepath):
    with open(filepath, 'rb') as f:
        data = cPickle.load(f, encoding='latin-1')
    return data
Esempio n. 54
0
def unpickled(filename):
    #assert os.path.isdir(filename)
    assert os.path.isfile(filename)
    with open(filename, 'rb') as fo:
        dict = cPickle.load(fo)
    return dict
Esempio n. 55
0
import os


def display(loc, dpname):
    top, right, bottom, left = loc
    cv2.rectangle(frame, (left * 4, top * 4), (right * 4, bottom * 4), (0, 0, 255), 2)
    cv2.rectangle(frame, (left * 4, bottom * 4 - 35),
                  (right * 4, bottom * 4), (0, 0, 255), cv2.FILLED)
    font = cv2.FONT_HERSHEY_DUPLEX
    cv2.putText(frame, dpname, (left * 4 + 6, bottom * 4 - 6), font, 1.0, (255, 255, 255), 1)

faces = {}
for face in os.listdir("faces/"):
    if not face.startswith("."):
        with open("faces/" + face, 'rb') as fp:
            face_info = c.load(fp)
            faces[face] = {}
            faces[face]["info"] = face_info
            faces[face]["name"] = face

cam = cv2.VideoCapture(0)
while True:
    _, frame = cam.read()
    sframe = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
    sframe = sframe[:, :, ::-1]
    face_locations = f.face_locations(sframe)
    for loc in face_locations:
        dpname = "unknown"
        face_enc = f.face_encodings(sframe, [loc])[0]
        for face in faces:
            match = f.compare_faces([faces[face]["info"]], face_enc, tolerance=0.5)
def main(args):
    def simulate_sorted_idxes(test_batch_num):
        if (test_batch_num + 1) * batch_size <= len(validation):
            end_num = (test_batch_num + 1) * batch_size
        else:
            end_num = len(validation)
            #batch_size = end_num - batch_num*batch_size
        sorted_idxes = sorted(
            validation[test_batch_num * batch_size:end_num],
            key=lambda idx: len(
                sent2idxtensor(tokenized_eng_sentences[idx], idx)),
            reverse=True)
        return sorted_idxes

    def syntax_bleu_acc(pairs_dict, sorted_idexes_dict):
        acc_list = []
        bleu_list = []
        for k, pairs_list in pairs_dict.items():
            acc = 0
            for idx, tup in enumerate(pairs_list):
                tp1, tp2 = tup[0], tup[1]
                idx_of_binary = sorted_idexes_dict[k][idx]
                assert len(tp1) == len(tp2), k
                assert len([0] + lf_binary_entsRAW[idx_of_binary] +
                           [0]) == len(tp1), "tp1: " + str(
                               tp1) + " , " + "binary : " + str(
                                   lf_binary_entsRAW[idx_of_binary])
                np_binary = -(
                    np.array([0] + lf_binary_entsRAW[idx_of_binary] + [0]) - 1)
                tp1, tp2 = np.array(tp1) * np_binary, np.array(tp2) * np_binary
                acc += list(tp1) == list(tp2)
                bleu = sentence_bleu([list(tp2)], tp1)
                bleu_list.append(bleu)
            acc = acc / len(pairs_list)
            acc_list.append(acc)
        return acc_list, bleu_list

    global split_num
    global shuffle_scheme

    lf_binary_entsRAW = cPickle.load(open("data/raw_lf_binary_ent.p", "rb"))

    split_num = args.split_num
    shuffle_scheme = args.shuffle_scheme
    batch_size = 32
    exec(
        open('data_prep/data_prepRAW_Shuffle.py').read(), globals(), globals())

    sorted_idexes_dict = {}
    test_batch_num = 0
    while (test_batch_num) * batch_size < len(validation):
        sorted_idexes_dict[test_batch_num +
                           1] = simulate_sorted_idxes(test_batch_num)
        test_batch_num += 1
        batch_size = 32

    directory = "outputs/" + args.loading_dir + "/validation_results"
    file_name = directory + "/validation_result.p"
    dict_pairs = cPickle.load(open(file_name, "rb"))
    try:
        tr_pairs = dict_pairs['translation_pairs']
    except:
        tr_pairs = dict_pairs['pairs_dict']['translation_pairs']
    tr_pairs = clean_pairs(tr_pairs)
    syntax_acc_list = syntax_bleu_acc(tr_pairs, sorted_idexes_dict)
    print("syntax acc is : ", np.mean(syntax_acc_list[0]))
    print("bleu mean is : ", np.mean(syntax_acc_list[1]))
    cPickle.dump(syntax_acc_list[1], open(directory + "/bleu_list.p", "wb"))
Esempio n. 57
0
# for student, points in student_points.items():
#     if points >= limit:
#         passed.append(student)
# set(passed)

solution_student.append(None)

# Assignment 20:
# a = [1, 2, 3, 4]
# b = a
# b.append(80)
# a

solution_student.append(None)


if __name__ == '__main__':
    import _pickle
    with open('./solution/solution_2.pkl', 'rb') as solution_file:
        solution_tutors = _pickle.load(solution_file)
    if solution_tutors == solution_student:
        print('Solved!')
    else:
        false_answers = []
        for index, answer in enumerate(solution_student):
            if answer != solution_tutors[index]:
                false_answers.append(str(index + 1))
            else:
                pass
        print('Try Again! Answer(s) for the assignment(s) {} are wrong'.format(', '.join(false_answers)))
Esempio n. 58
0
def load(file_path):
    f = open(file_path, 'rb')
    model = pickle.load(f)
    f.close()
    return model
def unpickle(file):
    with open(file, "rb") as fo:
        return cPickle.load(fo, encoding="latin1")
                                           'filters'),
                      help='path to the output filters directory',
                      metavar='FILTERS_PATH')
    parser.add_option(
        '-c',
        '--count0',
        dest='count0',
        default='4',
        help=('number of chunks to extract from the first convolutional ' +
              'layer, this number is halved for each next layer'),
        metavar='COUNT0')
    options, args = parser.parse_args()

    model = build_weighted_model(options.weights_path)

    pickle_data_0 = pickle.load(open('../ai-data/data_part0.pkl', 'rb'))
    pickle_data_1 = pickle.load(open('../ai-data/data_part1.pkl', 'rb'))
    pickle_data_2 = pickle.load(open('../ai-data/data_part2.pkl', 'rb'))
    pickle_data_3 = pickle.load(open('../ai-data/data_part3.pkl', 'rb'))

    pickle_data_concat = {
        'x':
        np.concatenate((pickle_data_0['x'], pickle_data_1['x'],
                        pickle_data_2['x'], pickle_data_3['x'])),
        'y':
        np.concatenate((pickle_data_0['y'], pickle_data_1['y'],
                        pickle_data_2['y'], pickle_data_3['y']))
    }

    extract_filters(model, pickle_data_concat, options.filters_path,
                    int(options.count0))