def __init__(self, input_size: int = INPUT_SIZE, output_size: int = OUTPUT_SIZE, hidden_size: int = HIDDEN_SIZE, embed_size: int = EMBED_SIZE, lr: float = LEARNING_RATE, clip_grad: float = CLIP_GRAD, init_range: float = INIT_RANGE): input_layers = [ Embedding(input_size, embed_size, init_range), LSTM(embed_size, hidden_size, init_range) ] output_layers = [ Embedding(output_size, embed_size, init_range), LSTM(embed_size, hidden_size, init_range, previous=input_layers[1]), Softmax(hidden_size, output_size, init_range) ] self.input_layers, self.output_layers = input_layers, output_layers self.hidden_size = hidden_size self.embed_size = embed_size self.input_size = input_size self.output_size = output_size self.lr = lr self.clip_grad = clip_grad
def get_image_feature(img_path, img_list_path, model_path, epoch, gpu_id): img_list = open(img_list_path) embedding = Embedding(model_path, epoch, gpu_id) files = img_list.readlines() print('files:', len(files)) faceness_scores = [] img_feats = [] for img_index, each_line in enumerate(files): if img_index % 500 == 0: print('processing', img_index) if img_index == 2000: break name_lmk_score = each_line.strip().split(' ') img_name = os.path.join(img_path, name_lmk_score[0]) img = cv2.imread(img_name) lmk = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32) lmk = lmk.reshape((5, 2)) img_feats.append(embedding.get(img, lmk)) faceness_scores.append(name_lmk_score[-1]) img_feats = np.array(img_feats).astype(np.float32) faceness_scores = np.array(faceness_scores).astype(np.float32) #img_feats = np.ones( (len(files), 1024), dtype=np.float32) * 0.01 #faceness_scores = np.ones( (len(files), ), dtype=np.float32 ) return img_feats, faceness_scores
def build_model(vocab: Vocabulary, args, **kwargs) -> Model: print("Building the model") vocab_size = vocab.get_vocab_size("tokens") EMBED_DIMS = 200 if args.pretrained_WE_path: # turn the tokens into 300 dim embedding. Then, turn the embeddings into encodings embedder = BasicTextFieldEmbedder( {"tokens": Embedding(embedding_dim=EMBED_DIMS, num_embeddings=vocab_size, pretrained_file=args.pretrained_WE_path, vocab=vocab, )}) else: embedder = BasicTextFieldEmbedder( {"tokens": Embedding(embedding_dim=EMBED_DIMS, num_embeddings=vocab_size)}) encoder = CnnEncoder(embedding_dim=EMBED_DIMS, ngram_filter_sizes = (2,3,5), num_filters=5) # num_filters is a tad bit dangerous: the reason is that we have this many filters for EACH ngram f # encoder = BertPooler("bert-base-cased") # the output dim is just the num filters *len(ngram_filter_sizes) # construct the regularizer applicator regularizer_applicator = None if args.use_reg : l2_reg = L2Regularizer() regexes = [("embedder", l2_reg), ("encoder", l2_reg), ("classifier", l2_reg) ] regularizer_applicator = RegularizerApplicator(regexes) return MortalityClassifier(vocab, embedder, encoder,regularizer_applicator,**kwargs)
def __init__(self, mode=None, tuner=None, model_name="lgb") -> None: self.stopWords = [ x.strip() for x in open( config.stopwords, encoding='utf-8', mode='r').readlines() ] self.embedding = Embedding() self.embedding.load() self.labelToIndex = json.load( open(config.label2id_file, encoding='utf-8')) self.ix2label = {v: k for k, v in self.labelToIndex.items()} self.mode = mode if not self.mode: self.mode = 'train' self.tuner = tuner assert self.mode in ['train', 'predict'] if self.tuner: assert self.tuner in ['bayes', 'grid'] if self.mode == "train": self.train_data = pd.read_csv( config.train_data_file, sep='\t').dropna().reset_index(drop=True) self.dev_data = pd.read_csv( config.eval_data_file, sep='\t').dropna().reset_index(drop=True) else: self.test_data = pd.read_csv( config.test_data_file, sep='\t').dropna().reset_index(drop=True) self.exclusive_col = ['text', 'lda', 'bow', 'label'] self.model = None self.model_name = model_name
def main(): """ test method for this class, takes a patient ID as the first cmd line argument and prints out lengths of each session that was loaded """ pid = sys.argv[1] patient = Patient(pid) if patient.pre_test is not None: print("season start: {}".format(len(patient.pre_test.raw))) for i in range(len(patient.intermediate_tests)): print("concussion {}: {}".format( i, len(patient.intermediate_tests[i].raw))) print("season end: {}".format(len(patient.post_test.raw))) prep.stft(patient.pre_test) examples = patient.pre_test.get_examples() emb = Embedding("pca") emb.train(examples) emb_examples = emb.embed(examples) #patient.season_start.extract_windows() #patient.season_start.plot_windows(windows=np.arange(10), channels=["c3", "cz", "c4", "p3", "pz", "p4"]) prep.extractWaves(patient.pre_test, n=4001, samplingRate=256, wave='alpha') patient.pre_test.extract_windows() patient.pre_test.plot_windows( windows=np.arange(10), channels=["c3", "cz", "c4", "p3", "pz", "p4"]) #patient.season_start.plot_channels(channels=["c3", "cz", "c4", "p3", "pz", "p4"], end=256) import pdb pdb.set_trace()
def create_representation(args): rep_type = args['<representation>'] path = args['<representation_path>'] w_c = args['--w+c'] eig = float(args['--eig']) if rep_type == 'PPMI': if w_c: raise Exception('w+c is not implemented for PPMI.') else: return Explicit.load(path, True) elif rep_type == 'SVD': if w_c: return EnsembleEmbedding(SVDEmbedding(path, False, eig, False), SVDEmbedding(path, False, eig, True), True) else: return SVDEmbedding(path, True, eig) else: if w_c: return EnsembleEmbedding(Embedding.load(path + '.words', False), Embedding.load(path + '.contexts', False), True) else: return Embedding.load(path, True)
def __init__(self, logger=Log(print), embedding_file='data/wiki-news-300d-1M.vec', bots_file='data/bots_tweets.txt', human_file='data/human_tweets.txt', validation_split=0.2, test_split=0.2, batch_size=50, epochs=25, additional_feats_enabled=True, custom_callback=None, early_stopping=5, dataset_config=DatasetConfig.USER_STATE): self.dataset = DatasetBuilder(logger, dataset_config) _, self.dataset_config_name = dataset_config self.logger = logger self.custom_callback = custom_callback self.embedding = Embedding(logger, embedding_file) self.model = None # initialize later self.additional_feats_enabled = additional_feats_enabled self.batch_size = batch_size self.epochs = epochs self.early_stopping = early_stopping self.validation_split = validation_split self.test_split = test_split self.bots_file = bots_file self.human_file = human_file self.x_bot_tweets = [] self.bot_tweets = [] self.bot_test_tweets = [] self.doc_test_tweets = [] self.labels_test = []
def create_representation(args): rep_type = args['<representation>'] path = args['<representation_path>'] neg = int(args['--neg']) w_c = args['--w+c'] eig = float(args['--eig']) normalize = args['--normalize'] if rep_type == 'PPMI': if w_c: raise Exception('w+c is not implemented for PPMI.') else: return PositiveExplicit(path, normalize, neg) elif rep_type == 'SVD': if w_c: return EnsembleEmbedding(SVDEmbedding(path, normalize, eig, False), SVDEmbedding(path, normalize, eig, True), normalize) else: return SVDEmbedding(path, normalize, eig) else: if w_c: return EnsembleEmbedding(Embedding(path + '.words', normalize), Embedding(path + '.contexts', normalize), normalize) else: return Embedding(path + '.words', normalize)
def dimension_afn(x): _embedding = Embedding(x) dim = np.arange(1, 20 + 2) E, Es = _embedding.afn(x, tau=138, dim=dim, window=45, metric='chebyshev') E1, E2 = E[1:] / E[:-1], Es[1:] / Es[:-1] _embedding.plot_afn(dim, E1, E2)
def __init__(self, char_dict_size: int = 10, char_embedding_dim: int = 8, word_dict_size: int = 10, word_embedding_dim: int = 300, char_filter_dim: int = 100, n_gram_sizes: Tuple[int, ...] = [5], rnn_dim: int = 100, keep_prob: bool = .8, bidirectional: bool = True): super(Bidaf, self).__init__() self.char_embedding = Embedding(char_dict_size, char_embedding_dim) self.word_embedding = Embedding(word_dict_size, word_embedding_dim) self.char_cnn_encoder = CNNEncoder(char_embedding_dim, char_filter_dim, n_gram_sizes) self.highway = Highway(char_filter_dim + word_embedding_dim, num_layers=2) self.contextual_embedding = RNNBaseModule(char_filter_dim + word_embedding_dim, rnn_dim, keep_prob=keep_prob, bidirectional=bidirectional) self.model_layers = RNNBaseModule(rnn_dim * 2, rnn_dim, num_layers=2, keep_prob=keep_prob, bidirectional=bidirectional) self.output_module = MultiDimLinear(rnn_dim * 2, 1)
def create_representation(args): rep_type = args['<representation>'] path = args['<representation_path>'] neg = int(args['--neg']) w_c = args['--w+c'] eig = float(args['--eig']) if rep_type == 'PPMI': if w_c: raise Exception('w+c is not implemented for PPMI.') else: return PositiveExplicit(path, True, neg) elif rep_type == 'SVD': if w_c: return EnsembleEmbedding(SVDEmbedding(path, False, eig, False), SVDEmbedding(path, False, eig, True), True) else: return SVDEmbedding(path, True, eig) elif rep_type == 'SGNS': if w_c: return EnsembleEmbedding(Embedding(path + '.words', False), Embedding(path + '.contexts', False), True) else: return Embedding(path + '.words', True) elif rep_type == 'discriminative': return discriminative_embedding(path, True, eig) elif rep_type == 'discriminative_SGNS': return discriminative_SGNS(path, True) elif rep_type == 'projective': return Projective_embedding(path)
def main(model_num=1): preprocess = Preprocess() texts_train, labels_train = preprocess.preprocessData( '../projet2/train.txt', mode="train") texts_dev, labels_dev = preprocess.preprocessData('../projet2/dev.txt', mode="train") MAX_SEQUENCE_LENGTH = 24 LSTM_DIM = 64 HIDDEN_LAYER_DIM = 30 NUM_CLASSES = 4 GAUSSIAN_NOISE = 0.1 DROPOUT = 0.2 DROPOUT_LSTM = 0.2 BATCH_SIZE = 200 X_train, X_val, y_train, y_val = train_test_split(texts_train, labels_train, test_size=0.2, random_state=42) labels_categorical_train = to_categorical(np.asarray(y_train)) labels_categorical_val = to_categorical(np.asarray(y_val)) labels_categorical_dev = to_categorical(np.asarray(labels_dev)) embedding = Embedding('../projet2/emosense.300d.txt') embeddings = embedding.getMatrix() tokenizer = embedding.getTokenizer() message_first_message_train, message_second_message_train, message_third_message_train = get_sequences( X_train, MAX_SEQUENCE_LENGTH, tokenizer) message_first_message_val, message_second_message_val, message_third_message_val = get_sequences( X_val, MAX_SEQUENCE_LENGTH, tokenizer) message_first_message_dev, message_second_message_dev, message_third_message_dev = get_sequences( texts_dev, MAX_SEQUENCE_LENGTH, tokenizer) model = CustomModel(model_num) model.build(embeddings, MAX_SEQUENCE_LENGTH, LSTM_DIM, HIDDEN_LAYER_DIM, NUM_CLASSES, noise=GAUSSIAN_NOISE, dropout_lstm=DROPOUT_LSTM, dropout=DROPOUT) model.summary() history = model.train(message_first_message_train, message_second_message_train, message_third_message_train, labels_categorical_train, message_first_message_val, message_second_message_val, message_third_message_val, labels_categorical_val) y_pred = model.predict([ message_first_message_dev, message_second_message_dev, message_third_message_dev ])
def main(model_dir): np.random.seed(0) torch.manual_seed(0) e = Embedding(model_dir) print(e.nearest_k('god')) print(e.similarity('god', 'wickedly')) print(e.analogy('god', 'love', 'satan', k=5)) print()
def __init__(self): self.img_cropper = ImageCropper() self.geolocation_model = GeolocationEstimator() self.embedding_model = Embedding() self.entity_retriever = EntityRetriever() self.news_api = NewsArticlesApi() self.events_api = OekgEventsApi() print('Loaded GeoWINE successfully.')
def dimension_fnn(x): _embedding = Embedding(x) dim = np.arange(1, 20 + 1) f1, f2, f3 = _embedding.fnn(x, tau=14, dim=dim, window=10, metric='cityblock') _embedding.plot_fnn(dim, f1, f2, f3)
def forward(self, xs): N, T = xs.shape V, D = self.W.shape out = np.empty((N, T, D), dtype='f') self.layers = [] for t in range(T): layer = Embedding(self.W) out[:, t, :] = layer.forward(xs[:, t]) self.layers.append(layer) return out
def test_embedding(): np.random.seed(42) random_bits = np.ones(100, dtype=int) assert np.sum(Embedding("00001111", random_bits).compute()) == 0 assert np.sum(Embedding("10001111", random_bits).compute()) == 1 random_bits = np.random.randint(2, size=80) s1 = "1" * 10 + "0" * 5 s2 = "1" * 11 + "0" * 4 assert pair_embed(s1, s2, random_bits) == 1 assert pair_embed("bad", "boy", random_bits) == 16
def _preprocess(self): self.data.dropna(inplace=True) self.data['qid'] = self.data['qid'].astype(int) self._gen_vocab() self.data['qs_processed'] = self.data['question'].apply( lambda x: ' '.join(jieba.cut(x))) self.embedding = Embedding(self.config['vocab_path'], self.config['w2v_path']) self.data['qid'] = self.data['qid'].astype(int) self.data['qs_embed'] = self.data['qs_processed'].apply( lambda x: self.embedding.sentence_embedding(x.split()))
def get_embedding_matrix_and_vectorizer(conversations): from vectorize import Vectorize vectorizer = Vectorize(conversations, MAX_VOCAB_SIZE) word_index = vectorizer.word2idx # train_sequences = vectorizer.vectorize_data(conversations, MAX_SEQUENCE_LENGTH) vectorizer.save_tokenizer(TOKENIZER_PATH) from embedding import Embedding embed = Embedding(word_index) embedding_matrix = embed.get_embedding_matrix() return embedding_matrix, vectorizer
class TestEmbedding(unittest.TestCase): def setUp(self): W = np.arange(21).reshape(7, 3) self.embedding = Embedding(W) self.index = np.array([0, 2, 0, 4]) def test_params(self): params, = self.embedding.params assert_array_equal(np.array([ [ 0, 1, 2], [ 3, 4, 5], [ 6, 7, 8], [ 9, 10, 11], [12, 13, 14], [15, 16, 17], [18, 19, 20] ]), params) def test_grads(self): grads, = self.embedding.grads assert_array_equal(np.array([ [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0] ]), grads) def test_forward(self): out = self.embedding.forward(self.index) assert_array_equal(np.array([ [ 0, 1, 2], [ 6, 7, 8], [ 0, 1, 2], [12, 13, 14] ]), out) def test_backward(self): dout = self.embedding.forward(self.index) self.embedding.backward(dout) grads, = self.embedding.grads assert_array_equal(np.array([ [ 0, 2, 4], [ 0, 0, 0], [ 6, 7, 8], [ 0, 0, 0], [12, 13, 14], [ 0, 0, 0], [ 0, 0, 0] ]), grads)
def check_embedding_quality(conversations): from vectorize import Vectorize vectorizer = Vectorize(conversations, MAX_VOCAB_SIZE) word_index = vectorizer.word2idx from embedding import Embedding embed = Embedding(word_index) docu_vocab = vectorizer.word_counts embedding_vocab = embed.get_embedding_vocab() oov_words = embed.check_coverage(docu_vocab, embedding_vocab) print('Collected oov words.') return oov_words
def raw_txt_to_embedding(embedding_file, content): # load nlp object nlp = spacy.load('en', create_make_doc=PlangTokenizer) embedding = Embedding(embedding_file) # push through it the text line pipe tokenized = nlp(content) # convert the trigrams to embedding content_embedding = embedding.words_to_embeddings(tokenized) # TODO: pipe to this to a keras model print(content_embedding)
def __init__(self, n_token, n_layer, n_head, d_model, d_head, d_inner, dropout, dropatt, tie_weight=True, d_embed=None, div_val=1, tie_projs=[False], pre_lnorm=False, tgt_len=None, ext_len=None, mem_len=None, cutoffs=[], adapt_inp=False, same_length=False, clamp_len=-1, sample_softmax=-1, demographics_len=0): super(MemTransformerLM, self).__init__() self.n_token = n_token d_embed = d_model if d_embed is None else d_embed self.d_embed = d_embed self.d_model = d_model self.n_head = n_head self.d_head = d_head self.word_emb = Embedding(n_token, d_embed) self.drop = nn.Dropout(dropout) self.n_layer = n_layer self.tgt_len = tgt_len self.ext_len = ext_len self.max_klen = tgt_len + ext_len self.clamp_len = clamp_len self.layers = nn.ModuleList() for i in range(n_layer): self.layers.append( DecoderLayer(n_head, d_model, d_head, d_inner, dropout, dropatt=dropatt, pre_lnorm=pre_lnorm)) self.pos_emb = PositionalEmbedding(self.d_model) self.loss = nn.BCEWithLogitsLoss() self.demographics_len = demographics_len self.fc = nn.Linear(self.d_embed + self.demographics_len, self.n_token, bias=True) weights_init(self)
def __init__( self, dataset='data/185_baseball.csv', columns=None, tree='ontologies/class-tree_dbpedia_2016-10.json', embedding='models/wiki2vec/en.model', row_agg_func=mean_of_rows, tree_agg_func=np.mean, source_agg_func=mean_of_rows, max_num_samples=1e6, verbose=False, ): # print function that works only when verbose is true self.vprint = print if verbose else no_op self.max_num_samples = max_num_samples self.embedding = embedding if isinstance( embedding, Embedding) else Embedding(embedding_path=embedding, verbose=verbose) self.dataset = dataset if isinstance( dataset, EmbeddedDataset) else EmbeddedDataset( self.embedding, columns=columns, dataset_path=dataset, verbose=verbose) self.tree = tree if isinstance( tree, EmbeddedClassTree) else EmbeddedClassTree( self.embedding, tree_path=tree, verbose=verbose) self.row_agg_func = row_agg_func self.source_agg_func = source_agg_func self.tree_agg_func = tree_agg_func self.similarity_matrices = {}
def _create_embeddings(self): self.embedding_layers = [] for i, table_size in enumerate(self.table_sizes): l = Embedding(input_dim=table_size, output_dim=self.local_embedding_dim, trainable=self.embedding_trainable) self.embedding_layers.append(l)
def __init__(self, vocab_sizes, embedding_dims, merge_methods, padding_indices, fix_embedding, out_method='none', out_dim=None): super(MultiFeatureEmbedding, self).__init__() self._vocab_sizes = vocab_sizes self._embedding_dims = embedding_dims self._n_feature = len(vocab_sizes) self._merge_methods = merge_methods self._padding_indices = padding_indices self._fix_embedding = fix_embedding self.emb_list = nn.ModuleList( Embedding(vocab_size, embedding_dim, padding_idx, fix_embedding) for vocab_size, embedding_dim, padding_idx in zip( vocab_sizes, embedding_dims, padding_indices)) self._out_method = out_method self._emb_out_dim = sum(dim for index, dim in enumerate(embedding_dims) if merge_methods[index] == 'cat') if out_method == 'none': self._out_dim = self._emb_out_dim elif out_method == 'linear': self._out_dim = out_dim self.out_module = nn.Linear(self._emb_out_dim, self._out_dim) else: self._out_dim = out_dim self.out_module = MLP(self._emb_out_dim, [int(self._emb_out_dim / 2), self._out_dim], ['prelu', 'prelu'])
def __init__(self, config, vocab): super(Net, self).__init__() self.embed = Embedding(config, vocab) def gen_convs(in_channel, kernel_sizes, output_channels): return nn.ModuleList([ nn.Conv1d(in_channels=in_channel, out_channels=oc, kernel_size=kz, padding=((kz - 1) // 2)) for kz, oc in zip(kernel_sizes, output_channels) ]) full_size = sum(config.output_channels) self.convs_QA = gen_convs(config.q_seq_len, config.kernel_sizes, config.output_channels) self.convs_QR = gen_convs(config.q_seq_len, config.kernel_sizes, config.output_channels) self.convs_CA = gen_convs(config.c_seq_len, config.kernel_sizes, config.output_channels) self.convs_CR = gen_convs(config.c_seq_len, config.kernel_sizes, config.output_channels) self.convs_PQ = gen_convs(full_size, config.kernel_sizes, config.output_channels) self.convs_PC = gen_convs(full_size, config.kernel_sizes, config.output_channels) self.drop_QA = nn.Dropout(config.dropout) self.drop_QR = nn.Dropout(config.dropout) self.drop_CA = nn.Dropout(config.dropout) self.drop_CR = nn.Dropout(config.dropout) self.drop_PQ = nn.Dropout(config.dropout) self.drop_PC = nn.Dropout(config.dropout) self.proj1 = nn.Linear(full_size, full_size) self.proj2 = nn.Linear(full_size, 1)
def __init__(self, data_set: DataSet, ngram_size: int, emb_size: int, hid_size: int): """Initialize the language recognition module. Args: data_set: the dataset from which the set of input symbols and output classes (languages) can be extracted ngram_size: size of n-gram features (e.g., use 1 for unigrams, 2 for bigrams, etc.) emb_size: size of the character embedding vectors hid_size: size of the hidden layer of the FFN use for scoring """ # Keep the size of the ngrams self.ngram_size = ngram_size # Calculate the embedding alphabet and create the embedding sub-module feat_set = self.alphabet(data_set) self.register("emb", Embedding(feat_set, emb_size)) # Encoding (mapping between langs and ints) lang_set = set(lang for (_, lang) in data_set) self.enc = Encoding(lang_set) # Scoring FFN sub-module self.register("ffn", FFN(idim=emb_size, hdim=hid_size, odim=len(lang_set))) # Additional check to verify that all the registered # parameters actually require gradients. This allows # to identify the "bug" in the embedding module. assert all([param.requires_grad is True for param in self.params()])
def convert_to_word_embedding(rank, glove_h5, prefix, *inputs): files = _plang_h5(prefix, driver='mpio', comm=MPI.COMM_WORLD) embedding = Embedding(glove_h5) MAX_COUNTER = 900000 files_counter = {key: 0 for key in files.keys()} print('[%d] Handling:: %s' % (rank, inputs), flush=True) with GracefulInterruptHandler() as h: for counter, f in enumerate(inputs): with open(f) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if h.interrupted: break plang_enum = get_type_by_name(row[PATH_COL]) key = prefix + plang_enum.name + '.h5' h5 = files[key] if files_counter[key] > MAX_COUNTER: continue if files_counter[key] % 1000 == 0: print('[%d] %s:: %d' % (rank, key, files_counter[key]), flush=True) idx = '/%s' % row[ID_COL] if idx not in h5: files_counter[key] += 1 content = row[CONTENT_COL] content_embedding = embedding.words_to_embeddings( content) h5.create_dataset(idx, data=content_embedding, dtype=content_embedding.dtype) print('[%d] finished %s' % (rank, counter + 1), flush=True) if h.interrupted: break print('[%d] exiting' % rank, flush=True) archiver.close_multi(files)
def get_image_feature(img_path, img_list_path, model_path, epoch_num, gpu_id): img_list = open(img_list_path) embedding = Embedding(model_path, epoch_num, gpu_id) files = img_list.readlines() img_feats = [] faceness_scores = [] for img_index, each_line in enumerate(print_progress(files)): name_lmk_score = each_line.strip().split(' ') img_name = os.path.join(img_path, name_lmk_score[0]) img = cv2.imread(img_name) lmk = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32) lmk = lmk.reshape( (5,2) ) img_feats.append(embedding.get(img,lmk)) faceness_scores.append(name_lmk_score[-1]) img_feats = np.array(img_feats).astype(np.float32) faceness_scores = np.array(faceness_scores).astype(np.float32) return img_feats, faceness_scores
def create_representation(rep_type, path, *args, **kwargs): if rep_type == 'Explicit' or rep_type == 'PPMI': return Explicit.load(path, *args, **kwargs) elif rep_type == 'SVD': return SVDEmbedding(path, *args, **kwargs) elif rep_type == 'GIGA': return GigaEmbedding(path, *args, **kwargs) elif rep_type: return Embedding.load(path, *args, **kwargs)
def simple_create_representation(rep_type, path, restricted_context=None, thresh=None, normalize=True): if rep_type == 'PPMI': return Explicit.load(path, normalize=normalize, restricted_context=restricted_context, thresh=thresh) else: return Embedding.load(path, True)
import sys import numpy as np import datetime sys.path.append('../SSH') sys.path.append('../alignment') from ssh_detector import SSHDetector from alignment import Alignment from embedding import Embedding #short_max = 800 scales = [1200, 1600] t = 2 detector = SSHDetector('../SSH/model/e2ef', 0) alignment = Alignment('../alignment/model/3d_I5', 12) embedding = Embedding('./model/model', 0) out_filename = './out.png' f = '../sample-images/t1.jpg' if len(sys.argv)>1: f = sys.argv[1] img = cv2.imread(f) im_shape = img.shape print(im_shape) target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) if im_size_min>target_size or im_size_max>max_size: im_scale = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: