def build_transformer(raw_input_values, with_linear_terms, other_terms): # Convert the raw inputs using the transformer functions transformer = Transformer(raw_input_values) if with_linear_terms: transformer.add_linear_terms() for name, fn in other_terms.iteritems(): transformer.add_new_term(name, fn) return transformer
def get_blueprint_info(path, transform_str): """ Returns information about the blueprint at path. If transform_str is given, blueprint will be transformed accordingly before returning. """ sheets = filereader.get_sheet_names(path) newphase, transforms, ztransforms = \ transformer.parse_transform_str(transform_str) result = '' for sheet in sheets: try: (layers, details) = filereader.parse_file(path, sheet[1]) # transform the blueprint if transforms is not None: logmsg('transform', 'Transforming with: %s' % transform_str) if newphase is not None: details['build_type'] = buildconfig.get_full_build_type_name(newphase) tran = Transformer(layers, details['start']) tran.transform(transforms) # do the x/y transformations details['start'] = tran.start layers = tran.layers logmsg('transform', 'Results of transform:') loglines('transform', lambda: FileLayer.str_layers(layers)) layers = FileLayers_to_GridLayers(layers) bp = Blueprint(sheet[0], layers, details) # perform any requested z-transforms if ztransforms is not None: layers = bp.repeat_ztransforms(ztransforms, bp.layers, Blueprint.repeater_layers) bp.layers = layers formatted = bp.get_info() # add this sheet's info to the result string result += '>>>> Sheet id %d\n' % sheet[1] result += formatted + '\n' except BlueprintError as ex: continue # ignore blank/missing sheets if result: return result else: raise BlueprintError("No valid blueprints found in '%s'." % path)
def __init__(self,**kwrds): self.learning = "SGD" self.eta =0.01 self.eta_decay =0.9 self.beta_decay =0 self.max_epoc =10 self.batch_size =1 self.eps =1e-6 self.weight_decay =0 self.init_beta = 0.01 self.lcl_lbfgs = [] self.lcltest_lbfgs = [] self.rlcl_lbfgs = [] self.rlcltest_lbfgs = [] Transformer.__init__(self,**kwrds)
def append_ahead_issue(issue_id_path, collection, year): HERE = os.path.abspath(os.path.dirname(__file__)) # data generator iter_data = TitleCollector(config.API_URL, collection=collection, username=config.API_USER, api_key=config.API_KEY) # id file rendering transformer = Transformer(filename=os.path.join(HERE, "templates/issue_db_entry.txt")) string_nahead = transformer.transform_list(iter_data, year) open(issue_id_path, "a").write(string_nahead.encode("ISO-8859-1")) return issue_id_path
def __init__(self, platformName, tracedir, pattern="*.json", appDomains=[]): ''' Constructor ''' self.platformName = platformName self.tracedir = tracedir self.pattern = pattern self.appDomains = appDomains self.traces = {} self.keywords = {} #Clustering Nodes self.allReq = [] #flattened trace used by new approach self.allItems = [] #flattened trace self.allSeq = [] self.cluster = [] self.clusterLabels = {} self.t = Transformer() #Distances self.dist = {} self.THRESH = utils.getDefaultThresh() #Graph fields self.nodes = [] self.edges = [] # Load Traces and Extract Keywords self.loadTraces()
def __init__(self, yaml=None): self.tf = Transformer() moveit_commander.roscpp_initialize(sys.argv) self.__arm_group = moveit_commander.MoveGroupCommander("arm") self.__arm_group.set_planning_time(5) self.__gripper_group = moveit_commander.MoveGroupCommander("gripper") self.__base_group = moveit_commander.MoveGroupCommander("base") self.__base_group.set_planning_time(2.5) self.__arm_base_group = moveit_commander.MoveGroupCommander("arm_base") self.__arm_base_group.set_planning_time(10) rospy.wait_for_service('/plan_kinematic_path') self.__plan_service = rospy.ServiceProxy('/plan_kinematic_path', GetMotionPlan) self.__planning_scene_interface = PlanningSceneInterface() euroc_interface_node = '/euroc_interface_node/' self.__set_object_load_srv = rospy.ServiceProxy(euroc_interface_node + 'set_object_load', SetObjectLoad) self.__manService = ManipulationService() rospy.sleep(1) self.__planning_scene_interface.add_yaml(yaml) self.__grasp = None self.__collision_object_buffer = [] rospy.loginfo("Manipulation started.")
def test_removing_styles_is_equal_to_original_line(self): """Style a line, remove escape sequences and compare to the original """ styles = [ RegexStyle("http:[\w+|/+|:]+", ["red"]), RegexStyle("^\w\w\w \d\d\s?", ['white', 'on-magenta']), RegexStyle("\d\d:\d\d:\d\d", ['bold', 'on-blue']), RegexStyle(".*<warn>.*", ['yellow']), RegexStyle("\((.*)\)", ['red', 'on-white']), RegexStyle("\[(.*)\]", ['grey', 'bold']), ] transformer = Transformer(styles) lines = self.get_lines('testdata/test-log') for original_line in lines: original_line = original_line.strip('\n') styled_line = transformer.style(original_line) styled_line = styled_line.encode('string_escape') unstyled_line = self.remove_styles(styled_line) self.assertEqual(original_line, unstyled_line)
def finish(cls): dv.fit(all_players) for (g, y, w) in all_games: this_games_players = list() for p in g.players: this_games_players.append({'name': p.__dict__["name"]}) rows = dv.transform(this_games_players) result_array = None for row in rows: if result_array == None: result_array = row else: result_array = result_array + row features.append(result_array.toarray()[0]) labels_home.append(g.score_home) labels_away.append(g.score_away) pca.fit(features) for (home, away, week, year) in future_games: rows = dv.transform(last_known_players[home] + last_known_players[away]) result_array = None for row in rows: if result_array == None: result_array = row else: result_array = result_array + row futures_home.append({ 'name': home, 'features': pca.transform(result_array.toarray()[0]) }) futures_away.append({ 'name': away, 'features': pca.transform(result_array.toarray()[0]) }) train_home = (np.array(pca.transform(features)), np.array(labels_home,)) train_away = (np.array(pca.transform(features)), np.array(labels_away,)) dir_name = os.path.dirname(Transformer.get_pickle_filename(cls.__name__)) train_home_name = os.path.join(dir_name, "train_home.pickle.gz") train_away_name = os.path.join(dir_name, "train_away.pickle.gz") future_home_name = os.path.join(dir_name, "futures_home.pickle.gz") future_away_name = os.path.join(dir_name, "futures_away.pickle.gz") cPickle.dump(train_home, gzip.open(train_home_name,'wb'), cPickle.HIGHEST_PROTOCOL) cPickle.dump(train_away, gzip.open(train_away_name,'wb'), cPickle.HIGHEST_PROTOCOL) cPickle.dump(futures_home, gzip.open(future_home_name,'wb'), cPickle.HIGHEST_PROTOCOL) cPickle.dump(futures_away, gzip.open(future_away_name,'wb'), cPickle.HIGHEST_PROTOCOL)
def main_system(profile=0): clbr = run_calibration() print "*******", clbr.conf_h, clbr.conf_yv, clbr.thr, clbr.light, "*******" trf = Transformer(clbr.light, clbr.conf_h, clbr.conf_yv, clbr.thr) trf.turn_on_bayes_classifier(clbr.pdf_cmp_h, clbr.pdf_cmp_v) track = StateTracker() while (1): f = read_camera() move_cue = trf.move_cue(f) skin_cue = trf.skin_classifier(f) skin_cue = trf.clean_whole_image(skin_cue) final = cv2.bitwise_and(skin_cue, move_cue) track.update(final) info = track.follow(f) cv2.imshow('IMG', f) cv2.imshow('SKIN FINAL', final) k = cv2.waitKey(20) if k == 27: break # debug & profile part if profile > 0: profile -= 1 if profile == 0: break
#print("bounds") #print(upper_confidence_bound(np.asarray([[0.00617284, 0.48765432]]))) min_val = scipydirect.minimize(neg_upper_confidence_bound,bounds) xval = min_val.x acc_targets = multi_fid_values['accuracy_targets']+[0.0] out_fid_level = num_fidelities-1# defaults to highest fidelity function for fid_level,(acc,reg) in enumerate(zip(acc_targets,regressors)): mean,stdev = reg.predict([min_val.x], return_std=True) if stdev*beta > acc: out_fid_level = fid_level break yval = -neg_upper_confidence_bound([xval]) return xval,yval,out_fid_level if __name__ == "__main__": assert len(sys.argv) == 2 , "needs one parameter, the data filename." data = json.load(open(sys.argv[1])) trans = Transformer(data) #ys,xs = parse_data(data,trans) #bounds = trans.get_bounds() #print(xs) #print(ys) #print(bounds) res = next_point(data,trans) print(res) inv_res = trans.inverse_point(res[0]) print(inv_res)
if config.run_tensorboard: from input_path import train_summary_writer, valid_summary_writer else: train_summary_writer = None valid_summary_writer = None #tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(file_path.subword_vocab_path) train_dataset, val_dataset = create_train_data() train_loss, train_accuracy = get_loss_and_accuracy() validation_loss, validation_accuracy = get_loss_and_accuracy() transformer = Transformer(num_layers=config.num_layers, d_model=config.d_model, num_heads=config.num_heads, dff=config.dff, input_vocab_size=config.input_vocab_size, target_vocab_size=config.target_vocab_size, rate=config.dropout_rate) generator = Generator() # The @tf.function trace-compiles train_step into a TF graph for faster # execution. The function specializes to the precise shape of the argument # tensors. To avoid re-tracing due to the variable sequence lengths or variable # batch sizes (the last batch is smaller), use input_signature to specify # more generic shapes. train_step_signature = [ tf.TensorSpec(shape=(None, None), dtype=tf.int64), tf.TensorSpec(shape=(None, None), dtype=tf.int64), tf.TensorSpec(shape=(None), dtype=tf.int32),
def evaluate_transformer(): tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file( os.path.join(output_path, tag_new_tok + "tokenizer_en_" + str(DICT_SIZE))) tokenizer_de = tfds.features.text.SubwordTextEncoder.load_from_file( os.path.join(output_path, tag_new_tok + "tokenizer_de_" + str(DICT_SIZE))) input_vocab_size = tokenizer_en.vocab_size + 2 target_vocab_size = tokenizer_de.vocab_size + 2 # using transformer2 as eng-> de transformer2 = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, rate=dropout_rate) ckpt = tf.train.Checkpoint(transformer2=transformer2) ckpt.restore(tf.train.latest_checkpoint(checkpoint_path)).expect_partial() print('Latest checkpoint restored!!') # loading different part of training set for backtrans (before :TRAIN_ON) train_on_end = TRAIN_ON + train_backtrans_on split = tfds.Split.TRAIN.subsplit(tfds.percent[TRAIN_ON:train_on_end]) print('Split is: {}'.format(split)) examples, metadata = tfds.load('wmt14_translate/de-en', data_dir=data_path, with_info=True, as_supervised=True, split=split) def filter_max_length(x, y, max_length=MAX_LENGTH): """Function restricting used sequences x and y to <= max_lenght""" return tf.logical_and( tf.size(x) <= max_length, tf.size(y) <= max_length) examples = examples.filter(filter_max_length) train_examples4backtrans = examples print('type of train_examples4backtrans: {}'.format( type(train_examples4backtrans))) print('shape of train_examples4backtrans: {}'.format( tf.data.experimental.cardinality(train_examples4backtrans))) dataset_length = [i for i, _ in enumerate(train_examples4backtrans)][-1] + 1 def predict(inp_sentence): start_token = [tokenizer_en.vocab_size] end_token = [tokenizer_en.vocab_size + 1] # inp sentence is ENGLISH, hence adding the start and end token inp_sentence = start_token + tokenizer_en.encode( inp_sentence) + end_token encoder_input = tf.expand_dims(inp_sentence, 0) # as the target is GERMAN, the first word to the transformer should be the # english start token. decoder_input = [tokenizer_de.vocab_size] output = tf.expand_dims(decoder_input, 0) # predictions.shape == (batch_size, seq_len, vocab_size) def symbols_to_logits(output): batched_input = tf.tile(encoder_input, [beam_width, 1]) enc_padding_mask, combined_mask, dec_padding_mask = create_masks( batched_input, output) predictions, attention_weights = transformer2( batched_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) predictions = predictions[:, -1, :] return predictions finished_seq, finished_scores, states = beam_search( symbols_to_logits, output, beam_width, MAX_LENGTH, target_vocab_size, alpha, states=None, eos_id=tokenizer_de.vocab_size + 1, stop_early=True, use_tpu=False, use_top_k_with_unique=True) return finished_seq[0, 0, :] def translate(sentence): result = predict(sentence) predicted_sentence = tokenizer_de.decode( [i for i in result if i < tokenizer_de.vocab_size]) print('Input: {}'.format(sentence)) print('Predicted translation: {}'.format(predicted_sentence)) return predicted_sentence translations = [] inputs = [] targets = [] BLEUs = [] i = 0 for sentence in train_examples4backtrans: # eng-> deu : hence indexes reversed inp = sentence[1].numpy().decode('utf-8') target = sentence[0].numpy().decode('utf-8') translation = translate(inp) BLEU = nltk.translate.bleu_score.sentence_bleu( [nltk.word_tokenize(target)], nltk.word_tokenize(translation)) translations.append(translation) inputs.append(inp) BLEUs.append(BLEU) print('Average BLEU score: ', 100 * np.mean(BLEUs)) targets.append(target) # i+=1 # store backtrans every 800 sentences # if i % 800 == 0: # d = {'input': inputs, 'target': targets, 'translation': translations, 'BLEU': BLEUs} # df = pd.DataFrame.from_dict(d) # df.to_csv(os.path.join(output_path, 'results_backtrans_' + experiment_name + '_interm_'+str(i)+'.csv')) d = { 'input': inputs, 'target': targets, 'translation': translations, 'BLEU': BLEUs } df = pd.DataFrame.from_dict(d) df.to_csv( os.path.join(output_path, 'results_backtrans_' + experiment_name + '.csv')) print('Average BLEU score: ', 100 * np.mean(BLEUs))
def main(): # Command line arguments args = get_args() # data vocab, train_data, valid_data = load_data() # Model model = Transformer(args.n_layers, args.embed_dim, args.hidden_dim, args.n_heads, vocab, args.dropout) if args.cuda: model = model.cuda() # Load existing model if os.path.isfile(args.model_file) and not args.overwrite_model: model.load_state_dict(th.load(args.model_file)) # Optimizer optim = th.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.98)) # Learning rate schedule lr_schedule = inverse_sqrt_schedule(2000, args.lr) # Dataloader train_loader = MTDataLoader(train_data, max_bsz=args.samples_per_batch, max_tokens=args.tokens_per_batch, shuffle=True) valid_loader = MTDataLoader(valid_data, max_bsz=args.samples_per_batch, max_tokens=args.tokens_per_batch, shuffle=False) # Either validate if args.validate_only: valid_ppl = evaluate_ppl(model, valid_loader) print(f"Validation perplexity: {valid_ppl:.2f}") else: # Train epochs best_ppl = 1e12 for epoch in range(1, args.n_epochs + 1): print(f"----- Epoch {epoch} -----") # Train for one epoch model.train() train_epoch(model, optim, train_loader, lr_schedule, args.clip_grad) # Check dev ppl model.eval() valid_ppl = evaluate_ppl(model, valid_loader) print(f"Validation perplexity: {valid_ppl:.2f}") # Early stopping maybe if valid_ppl < best_ppl: best_ppl = valid_ppl print(f"Saving new best model (epoch {epoch} ppl {valid_ppl})") th.save(model.state_dict(), args.model_file)
import os, sys import dataloader as dd from keras.optimizers import * from keras.callbacks import * itokens, otokens = dd.MakeS2SDict('data/pinyin.corpus.txt', dict_file='data/pinyin_word.txt') print('seq 1 words:', itokens.num()) print('seq 2 words:', otokens.num()) from transformer import Transformer, LRSchedulerPerStep d_model = 256 s2s = Transformer(itokens, otokens, len_limit=500, d_model=d_model, d_inner_hid=1024, \ n_head=4, layers=3, dropout=0.1) mfile = 'models/pinyin.model.h5' lr_scheduler = LRSchedulerPerStep(d_model, 4000) model_saver = ModelCheckpoint(mfile, monitor='ppl', save_best_only=True, save_weights_only=True) #s2s.model.summary() opt = Adam(0.001, 0.9, 0.98, epsilon=1e-9) s2s.compile(opt) try: s2s.model.load_weights(mfile) except:
def finish(cls): df = pd.DataFrame.from_dict(rows) df.index.name = "index" df.to_csv(Transformer.get_csv_filename(cls.__name__)) df.to_pickle(Transformer.get_pickle_filename(cls.__name__))
def collect(self): b = Browser() for i in xrange(100): log.info('LOAD PAGE WITH CAPTCHA') b.get('http://exchangecity.ru/?cmd=bonus') captcha = 'http://exchangecity.ru/include/anti_robot.php' b.save(captcha, CAPTCHA_PATH + Implem.name + '/%02d.png' % i) t = Transformer() t.load('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY_INV) ''' radius = 3 kernel = cvCreateStructuringElementEx(radius * 2 + 1, radius * 2 + 1, radius, radius, CV_SHAPE_ELLIPSE) t.morphology('morphology', t['binarize'], 1, 1, kernel) ''' t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) continue t.normolize('origsplit', 'breaksplit', Implem.size) t.savesymbols('origsplit', SYMBOLS_PATH + Implem.name, '%02d' % i) del t
def __init__(self, d, m): self.platform1 = d self.platform2 = m self.t = Transformer()
def preparing(b, t, a, l, d): b.show() for i in xrange(30): b.get('http://wmstream.ru/') captcha = b.js('$("#wmbonus_form_captcha img")[0].src') b.save(captcha, '/home/polzuka/inspirado/captcha/picture%02d' % i) t = Transformer() t.load('orig', b.image(captcha)) #t.save(t['orig'], '/home/polzuka/inspirado/captcha/picture%02d' % i) t.resizeby('resize', t['orig'], 4, 4) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY) radius = 3 kernel = cvCreateStructuringElementEx(radius * 2 + 1, radius * 2 + 1, radius, radius, CV_SHAPE_ELLIPSE) t.morphology('morphology', t['binarize'], 1, 1, kernel) try: t.breakSplit('breaksplit', t['morphology'], 0.2) except TransformError: print 'ololo' continue t.normolize('origsplit', 'breaksplit', 20, 30) #t.show() t.saveSymbols('origsplit', '/home/polzuka/inspirado/symbols', '%02d' % i) del t
def assert_styled_line(self, styles, input_line, expected_output_line): transformer = Transformer(styles) actual_output_line = transformer.style(input_line) self.assertEquals(expected_output_line, actual_output_line)
descriptions = df['description'].tolist() FT = FilteredTokenizer() Tokens = FT.filter_and_tokenize(descriptions, mode=TOKEN_FILTERS, tokenizer=TOKENIZER, filter_fpath=CUSTOM_FILTER_PATH) WordEmbedding_ = WordEmbedding() WordEmbedding_.load() print("====== Examples of things you can do with the embeddings =======") print(WordEmbedding_.word_vectors.most_similar(positive=['woman', 'king'], negative=['man'])) print(WordEmbedding_.word_vectors.most_similar("dont")) print(WordEmbedding_.word_vectors.most_similar("a")) matched_tokens, unmatched_tokens = WordEmbedding_.check_embedding_coverage(list_tokens=Tokens, verbose=True) # Then you will get a file named <embedding file name> + <date time> + unmatched tokens # this is a file with count distinct unmatched tokens, sorted in descending order # Then you are able to see these attributes: print("WordEmbedding_.coverage", WordEmbedding_.coverage) # print("WordEmbedding_.wordvec_map", WordEmbedding_.wordvec_map) print("You can get a word vector of the word 'hello' by calling: WordEmbedding_.word_vectors.get_vector('hello')", WordEmbedding_.word_vectors.get_vector('hello')) T = Transformer(WordEmbedding_.wordvec_map) # will convert the points numbers (score values) into one-hot vectors of categories defined by us (interval) # You can change the setting in config y = df['points'].tolist() X, y = T.fit_transform(Tokens, y, drop_long_sentences=DROP_LONG_SENTENCES, drop_short_sentences=DROP_SHORT_SENTENCES, num2cat_=CONVERT_Y, intervals=Y_CAT_INTERVALS) print("X.shape, y.shape ", X.shape, y.shape)
class ViTNet(nn.Module): def __init__(self, num_classes=1, dim=32, num_tokens=8**3, token_c=64, mlp_dim=128, heads=8, depth=1, dropout=0.1): super(ViTNet, self).__init__() self.to_patch_embedding = nn.Sequential( Rearrange('b c (h p1) (w p2) (d p3)-> b (h w d) (p1 p2 p3 c)', p1=8, p2=8, p3=8), Linear(8**3, token_c), ) self.pos_embedding = nn.Parameter( torch.empty(1, (num_tokens + 1), token_c)) torch.nn.init.normal_(self.pos_embedding, std=.02) self.cls_token = nn.Parameter(torch.zeros(1, 1, token_c)) self.dropout = Dropout(dropout) self.transformer = Transformer(token_c, depth, heads, mlp_dim, dropout) self.to_cls_token = nn.Identity() # output self.nn1 = Linear(64, 1) self.act1 = ReLU() torch.nn.init.xavier_uniform_(self.nn1.weight) torch.nn.init.normal_(self.nn1.bias, std=1e-6) self.nn2 = Linear(8, num_classes) torch.nn.init.xavier_uniform_(self.nn2.weight) torch.nn.init.normal_(self.nn2.bias, std=1e-6) def forward(self, x, mask=None): T = self.to_patch_embedding(x) cls_tokens = self.cls_token.expand(T.shape[0], -1, -1) x = torch.cat((cls_tokens, T), dim=1) x += self.pos_embedding T = self.dropout(x) x = self.transformer(T) x = self.to_cls_token(x[:, 0]) # x = self.act1(self.nn1(x)) x = self.nn1(x) return x def relprop(self, cam=None, method="transformer_attribution", is_ablation=False, start_layer=0, **kwargs): cam = self.nn1.relprop(cam, **kwargs) cam = self.transformer.relprop(cam, **kwargs) if method == "transformer_attribution" or method == "grad": cams = [] grad = self.transformer.attn.get_attn_gradients() cam = self.transformer.attn.get_attn_cam() cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1]) grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1]) cam = grad * cam cam = cam.clamp(min=0).mean(dim=0) cams.append(cam.unsqueeze(0)) rollout = compute_rollout_attention(cams, start_layer=start_layer) cam = rollout[:, 0, 1:] return cam
# import numpy as np import cv2 import pickle import matplotlib.pyplot as plt import matplotlib.image as mpimg from moviepy.editor import VideoFileClip from scipy.ndimage.measurements import label from vehicle_classifier import VehicleClassifier from transformer import Transformer from lanefinder import LaneFinder # Definition of global Transformer and global LaneFinder used in the process image function globalTransformer = Transformer() globalLaneFinder = LaneFinder() left_fit_buffer = None right_fit_buffer = None heatmap_history = [] ''' global left_fit_buffer global right_fit_buffer undistorted = globalTransformer.undistort(image) warped = globalTransformer.warp(undistorted) masked = globalTransformer.color_grad_threshold(warped, sobel_kernel=9, thresh_x=(20, 100),thresh_c=(120, 255)) left, right = globalLaneFinder.find_peaks(masked) left_fit, right_fit, leftx, lefty, rightx, righty = globalLaneFinder.sliding_window(masked, left, right) # take an average of previous frames to smooth the detection if left_fit_buffer is None:
print('seq 1 words:', itokens.num()) print('seq 2 words:', otokens.num()) print('train shapes:', Xtrain.shape, Ytrain.shape) print('valid shapes:', Xvalid.shape, Yvalid.shape) ''' from rnn_s2s import RNNSeq2Seq s2s = RNNSeq2Seq(itokens,otokens, 256) s2s.compile('rmsprop') s2s.model.fit([Xtrain, Ytrain], None, batch_size=64, epochs=30, validation_data=([Xvalid, Yvalid], None)) ''' from transformer import Transformer, LRSchedulerPerStep d_model = 256 s2s = Transformer(itokens, otokens, len_limit=70, d_model=d_model, d_inner_hid=512, \ n_head=8, layers=2, dropout=0.1) mfile = 'models/en2de.model.h5' lr_scheduler = LRSchedulerPerStep(d_model, 4000) model_saver = ModelCheckpoint(mfile, save_best_only=True, save_weights_only=True) s2s.compile(Adam(0.001, 0.9, 0.98, epsilon=1e-9)) try: s2s.model.load_weights(mfile) except: print('\n\nnew model') if 'eval' in sys.argv:
def pickup(self): # создаем браузер, которым будем ходить по wmtake.ru b = Browser() # создаем анализатор, которым будем распознавать капчу a = Analyzer(self.site, self.symsize, self.charset) a.load() b.show() while(True): log.debug('LOADING PAGE WITH WM BONUS') b.get('http://exchangecity.ru/?cmd=bonus') log.debug('SAVING CAPTCHA') captcha = 'http://exchangecity.ru/include/anti_robot.php' #b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) log.debug('CAPTCHA TRANSFORMING') t = Transformer('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY_INV) t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) continue t.normolize('origsplit', 'breaksplit', self.symsize) symbols = t.slice('origsplit') log.debug('RECOGNITION CAPTCHA') code = a.captcha(symbols) log.debug('ANALYZE RESULT: %s' % colorize(code)) del t print code log.debug('FILLING FIELDS') b.js("$('input[name = img]').val('%s')" % code) b.js("$('input[name = WALLET_BONUS]').val('R%s')" % self.purse) b.js("$('input[name = get_bonus]').click()") b.sleep(1) if not b.js("$('font.textbl:contains(Вы получили бонус в размере)').length"): log.debug('FINISH') break log.debug('INCORRECT CAPCTHA RECOGNITION') self.quit()
def collect(self): b = Browser() for i in xrange(200): log.info('LOAD PAGE WITH CAPTCHA') b.get('http://sms-webmoney.ru/') captcha = 'http://sms-webmoney.ru/img.php' b.save(captcha, CAPTCHA_PATH + self.site + '/%02d.png' % i) t = Transformer() t.load('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 3, 3) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 200, CV_THRESH_BINARY_INV) radius = 2 kernel = cvCreateStructuringElementEx(radius * 2 + 1, radius * 2 + 1, radius, radius, CV_SHAPE_ELLIPSE) t.morphology('morphology', t['binarize'], 0, 1, kernel) t.contourSplit('breaksplit', t['morphology'], 0.01) if len(t.symbols) != self.symqty: log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) continue t.normolize('origsplit', 'breaksplit', self.symsize) t.savesymbols('origsplit', SYMBOLS_PATH + self.site, '%02d' % i) del t
def pickup(self): # создаем браузер, которым будем ходить по wmtake.ru b = Browser() # сщздаем анализатор, которым будем распознавать капчу a = Analyzer(self.site, self.symsize, self.charset) a.load() b.show() log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') while(True): log.debug('SAVING CAPTCHA') captcha = b.js('$("#scode-pic img")[0].src') #b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) log.debug('CAPTCHA TRANSFORMING') try: t = Transformer('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 150, CV_THRESH_BINARY_INV) t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: raise Exception except Exception, e: log.debug(e) log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') continue t.normolize('origsplit', 'breaksplit', self.symsize) symbols = t.slice('origsplit') log.debug('RECOGNITION CAPTCHA') code = a.captcha(symbols) log.debug('ANALYZE RESULT: %s' % colorize(code)) del t print code log.debug('FILLING FIELDS') b.js("$('#scode').val('%s')" % code) b.js("$('#purse').val('R%s')" % self.purse) b.js("$('div.news_box div.bn p').click()") b.sleep(10) if not b.js("$('#mess-exec:visible').length"): log.debug('FINISH') break log.debug('INCORRECT CAPCTHA RECOGNITION') log.debug('LOADING PAGE WITH WM BONUS') b.js("$('#mess-exec p').click()")
def __create_transformer_block(self, num_transformers, dropout=0.3): transformers = [] for i in range(num_transformers): transformers.append(Transformer(dim_embedding=self.dim_embedding, num_heads=self.num_heads, dropout=dropout)) return nn.Sequential(*transformers)
class ActionRecognizer(): def __init__(self, d, m): self.platform1 = d self.platform2 = m self.t = Transformer() def setConfig(self, conf): self.config = conf return self def run(self): self.clusterRequests() self.assignSymbols(self.platform1) self.mergeAndAssign(self.platform1, self.platform2) def clusterRequests(self): for p in [self.platform1, self.platform2]: logging.info("clustering platform "+ p.platformName) # extract words for k,v in sorted(p.traces.iteritems()): traceInfo = [] for action in v: if 'ID' in action: url = urlparse.urlparse(action['REQUEST']) urlPath = self.doIgnore(url.path) kws = self.getKeywords(url.query, action) allKeywords = self.getJointKws((urlPath, kws)) rel_uri = url.path + '?' + url.query action['URI'] = rel_uri action['URL_PATH'] = urlPath + '?' + url.query action['INFO'] = traceInfo action['KEYWORDS'] = allKeywords # traceInfo.append([urlPath, kws, action['ID'], allKeywords]) traceInfo.append([urlPath + '?' + url.query, kws, action['ID'], allKeywords]) p.keywords[k] = traceInfo #print traceInfo # ### SIMPLE URL BASED CLUSTERING # if(self.config["URL_CLUSTER"]): # print "Simple URL based clustering for evaluation" # # mapCl= {} # for k,v in sorted(p.keywords.iteritems()): # for ti in v: # url = ti[0] #ti[4] # if url in mapCl: # mapCl[url].append(ti) # else: # mapCl[url] = [ti] # urlcluster = [] # for actions in mapCl.values(): # urlcluster.append(set([a[2] for a in actions])) # # p.cluster = urlcluster # continue # level 1 clustering - combine with same url path mapCl= {} for k,v in sorted(p.keywords.iteritems()): for ti in v: urlPath = ti[0] if urlPath in mapCl: mapCl[urlPath].append(ti) else: mapCl[urlPath] = [ti] #print len(mapCl) #print mapCl.keys() l1clusters = [] for actions in mapCl.values(): l1clusters.append(set([a[2] for a in actions])) #print "L1 CLUSTERS::", len(l1clusters), ">>", l1clusters ### SIMPLE URL BASED CLUSTERING if(self.config["URL_CLUSTER"]): print "Simple URL based clustering for evaluation" p.cluster = l1clusters continue # level 2 clustering # - agglomerate (combine small clusters with similar url path) # - divisive (split large clusters) smallClusters, largeClusters = [],[] for k,v in mapCl.iteritems(): if len(v) == 1: smallClusters.append((k,v)) if len(v) > 1: largeClusters.append((k,v)) sClusters = [] sLen = len(smallClusters) #print "Small Clusters", smallClusters if sLen > 1: for i in range(sLen-1): for j in range(i+1, sLen): #print i,j,smallClusters[i][1], smallClusters[j][1] cluster1 = smallClusters[i][1][0] #get the first and only element cluster2 = smallClusters[j][1][0] a = cluster1[3] b = cluster2[3] if self.isSimilar(a, b): #print "add mapping", (cluster1, cluster2) utils.addMappingToList(sClusters, cluster1[2], cluster2[2]) for sc in smallClusters: elem = sc[1][0][2] if not self.isPresent(sClusters, elem): sClusters.append(set([elem])) #print len(sClusters), sClusters lClusters = [] #print "Large Clusters::", len(largeClusters), ">>", largeClusters for lc in largeClusters: dist = self.getClusterDistance(lc[1]) newCluster = self.doAgglomerativeClustering(lc[1], dist) lClusters.extend(newCluster) #print "new clusters::", len(lClusters), ">>", lClusters p.cluster = lClusters + sClusters # p.cluster = l1clusters def doAgglomerativeClustering(self, cl, dist): cluster = [] n=len(dist) for i in range(0,n-1): for j in range(i+1,n): a = cl[i][2] b = cl[j][2] if dist[i,j] < T2: # if a cluster already contains a icl = self.isPresent(cluster, a) if icl != None: icl.add(b) break # if a cluster contains b (rare) jcl = self.isPresent(cluster, b) if jcl is not None: jcl.add(b) else: cluster.append(set([a,b])) if not self.isPresent(cluster, a): cluster.append(set([a])) if not self.isPresent(cluster, cl[n-1][2]): #Edge case - last element cluster.append(set([cl[n-1][2]])) #print " => Clusters:", cluster return cluster def isPresent(self, cluster, x): for cl in cluster: for c in cl: if c == x: return cl return None def getClusterDistance(self, cl): n=len(cl) dist = numpy.zeros(shape=(n,n)) for i in range(0,n): for j in range(0,n): dist[i,j] = utils.getJaccardDistance(cl[i][1], cl[j][1]) return dist ''' (url, kws) => [urlkws + kws] ''' def getJointKws(self, tup): return self.getUrlPathKeywords(tup[0]) + tup[1] def isSimilar(self, a, b): if utils.getJaccardDistance(a, b) < T1: #THRESH return True return False def assignSymbols(self, p): logging.info("assign symbols to platform "+ p.platformName) cl = p.clusterLabels labels = utils.getUUChars() for c in p.cluster: l = labels[0] cl[l] = c labels.remove(l) #self.printClusterLabels(p) def printClusterLabels(self, p): cl = p.clusterLabels print "cluster lengths" for k in cl: print k,'\t',len(cl[k]) print "cluster details" for k in cl: print k, ': (',len(cl[k]),') ', cl[k] for t in cl[k]: req = p.allReq[t] print "\t", req['REQUEST'], req['KEYWORDS'], if 'post_data' in req: print "POST:", req['post_data'] else: print def mergeAndAssign(self, d, m): logging.info("merge "+ d.platformName+ " clusters while assign corresponding symbols to "+ m.platformName) mLabels = {} for file1, tr1 in sorted(d.traces.iteritems()): for file2, tr2 in sorted(m.traces.iteritems()): if(self.config["URL_CLUSTER"]): # Do simplistic URL based matching mc = utils.getURLMatchSeqCount(tr1, tr2, d, m, mLabels) else: mc = utils.getMatchSeqCount(tr1, tr2, d, m, mLabels, T1) cluster = m.cluster cl = m.clusterLabels dLabels = d.getLabels() chars = utils.getUUChars() charList = chars[len(dLabels):] for c in cluster: labels = set([]) #print c, ":: ", for actionNum in c: if actionNum in mLabels: labels = labels.union(mLabels[actionNum]) #print "= Same cluster labels", labels label = None if len(labels) == 0: label = charList[0] charList.remove(label) else: sLabels = list(labels) sLabels.sort() label = sLabels[0] labels.remove(label) if len(labels) > 0: print "- merging desktop labels", utils.printSet(sLabels) #merge similar labels from other platform for l in labels: if l in dLabels and label in dLabels: dLabels[label] = dLabels[label].union(dLabels[l]) del dLabels[l] if label in cl: cl[label].update(c) else: cl[label] = c # EVALUATION (RQ - step 1 effectiveness) print "%"*120, "\n\t START RQ1: (Action Recognition through Clustering) \n", "%"*120 # print "Desktop URLs" # self.printOrderedReq(d.allReq) # print "-"*80 print "Desktop Clusters" self.printClusterLabels(d) # print "Mobile URLs" # self.printOrderedReq(m.allReq) # print "-"*80 print "Mobile Clusters" self.printClusterLabels(m) print "%"*120, "\n\t END RQ1: \n", "%"*120 def printOrderedReq(self, req): requests = [] for r in req: request = r['REQUEST'] if 'post_data' in r: request = request + " POST:" + r['post_data'] requests.append(request) requests = sorted(requests) for url in requests: print "\t-",url ''' Utility functions for HTTP Request ''' def getKeywords(self, query, action): qs = urlparse.parse_qs(query) qsw = [] for qk in qs.keys(): qsw.extend([qk, ",".join(qs[qk])]) #Fix, split words with separators like / (TODO: Add more separators) nqsw = [] for w in qsw: if '/' in w: nqsw.extend(w.split('/')) else: nqsw.append(w) qsw = nqsw postw = [] if 'post_data' in action: post_data = urllib.unquote(action['post_data']) ps = urlparse.parse_qs(post_data) for pk in ps.keys(): postw.extend([pk, ",".join(ps[pk])]) # transform by applying map and filter return self.t.transform(qsw + postw) def getUrlPathKeywords(self, path): kws = re.compile('[\W]').split(path) return self.t.transform(kws) def doIgnore(self, path): prefixList = self.config["prefix"] if prefixList != None: prefixList.sort(key=len, reverse=True) if prefixList != None: for pf in prefixList: path = re.sub(pf, '', path) path = path.rstrip('/') return path '''
#plt.imshow(mask) #plt.show() if len(instance_masks) <= 0: pass kp = np.reshape(keypoints, (-1, config.NUM_KP, 3)) instance_masks = np.stack(instance_masks).transpose((1, 2, 0)) overlap_mask = instance_masks.sum(axis=-1) > 1 seg_mask = np.logical_or(crowd_mask, np.sum(instance_masks, axis=-1)) print(kp.shape) # Data Augmentation single_masks = [seg_mask, unannotated_mask, crowd_mask, overlap_mask] all_masks = np.concatenate([np.stack(single_masks, axis=-1), instance_masks], axis=-1) aug = AugmentSelection.unrandom() img, all_masks, kp = Transformer.transform(img, all_masks, kp, aug=aug) num_instances = instance_masks.shape[-1] instance_masks = all_masks[:, :, -num_instances:] seg_mask, unannotated_mask, crowd_mask, overlap_mask = all_masks[:, :, : 4].transpose( (2, 0, 1)) seg_mask, unannotated_mask, crowd_mask, overlap_mask = [ np.expand_dims(m, axis=-1) for m in [seg_mask, unannotated_mask, crowd_mask, overlap_mask] ] kp = [np.squeeze(k) for k in np.split(kp, kp.shape[0], axis=0)] kp_maps, short_offsets, mid_offsets, long_offsets = get_ground_truth( instance_masks, kp) '''
def main(): train_data = SentenceDataset(args.train_file, encoding_type=args.encoding_type, filter_threshold=args.filter_threshold) val_data = SentenceDataset(args.val_file, encoding_type=args.encoding_type, filter_threshold=args.filter_threshold) train_loader = torch.utils.data.DataLoader(train_data, args.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(val_data, args.batch_size) print(len(train_loader)) input_dim = len(train_data.vocab.source_vocab) output_dim = len(train_data.vocab.target_vocab) static = args.embedding_type == 'static' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') enc_embedding = Embeddings(input_dim, args.hidden_dim, args.max_len, device, static) encoder_layer = EncoderLayer(args.hidden_dim, args.num_enc_heads, args.inner_dim, args.dropout) encoder = Encoder(enc_embedding, encoder_layer, args.num_enc_layers, args.dropout) dec_embedding = Embeddings(input_dim, args.hidden_dim, args.max_len, device, static) decoder_layer = DecoderLayer(args.hidden_dim, args.num_dec_heads, args.inner_dim, args.dropout) decoder = Decoder(output_dim, args.hidden_dim, dec_embedding, decoder_layer, args.num_dec_layers, args.dropout) pad_id = train_data.vocab.source_vocab['<pad>'] model = Transformer(encoder, decoder, pad_id, device) print('Transformer has {:,} trainable parameters'.format( count_parames(model))) if args.load_model is not None: model.load(args.load_model) else: model.apply(init_weights) if args.mode == 'test': inferencer = Inferencer(model, train_data.vocab, device) greedy_out = inferencer.infer_greedy( 'helo world, I m testin a typo corector') print(greedy_out) elif args.mode == 'train': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_function = nn.NLLLoss(ignore_index=pad_id) print('Started training...') train(model, train_loader, val_loader, optimizer, loss_function, device) else: raise ValueError('Mode not recognized')
# embedding size max_length=100, hidden_units=512, dropout_rate=0.1, lr=0.0001, is_training=True) return params arg = create_hparams() arg.input_vocab_size = len(en_vocab) arg.label_vocab_size = len(zh_vocab) arg.is_training = False arg.dropout_rate = 0. g = Transformer(arg) saver = tf.train.Saver() de_zh_vocab = {v: k for k, v in zh_vocab.items()} with tf.Session() as sess: saver.restore(sess, 'logs/model') for i in range(100): line = encoder_inputs[i * 1000] x = np.array(line) x = x.reshape(1, -1) de_inp = [[zh_vocab['<GO>']]] while True: y = np.array(de_inp) preds = sess.run(g.preds, {g.x: x, g.de_inp: y})
collate_fn=generateBatch) testIter = DataLoader(testData, batch_size=BATCH_SIZE, shuffle=True, collate_fn=generateBatch) ### BUILD MODEL device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Transformer( embeddingSize=256, srcVocabSize=len(sourceVocab), trgVocabSize=len(targetVocab), srcPadIdx=PAD_IDX, numHeads=8, numEncoderLayers=3, numDecoderLayers=3, forwardExpansion=4, dropout=0.2, maxLen=350, device=device, ).to(device) optimizer = optim.Adam(model.parameters(), lr=0.0003) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True) criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX) ### TRAIN AND EVALUATE
def main(settings): """ Translates a source language file (or STDIN) into a target language file (or STDOUT). """ # Create the TensorFlow session. g = tf.Graph() with g.as_default(): tf_config = tf.compat.v1.ConfigProto() tf_config.allow_soft_placement = True session = tf.compat.v1.Session(config=tf_config) # Load config file for each model. configs = [] for model in settings.models: config = load_config_from_json_file(model) setattr(config, 'reload', model) configs.append(config) # Create the model graphs. logging.debug("Loading models\n") models = [] for i, config in enumerate(configs): with tf.compat.v1.variable_scope("model%d" % i) as scope: if config.model_type == "transformer": model = TransformerModel(config) else: model = rnn_model.RNNModel(config) model.sampling_utils = SamplingUtils(settings) models.append(model) # Add smoothing variables (if the models were trained with smoothing). #FIXME Assumes either all models were trained with smoothing or none were. if configs[0].exponential_smoothing > 0.0: smoothing = ExponentialSmoothing(configs[0].exponential_smoothing) # Restore the model variables. for i, config in enumerate(configs): with tf.compat.v1.variable_scope("model%d" % i) as scope: _ = model_loader.init_or_restore_variables( config, session, ensemble_scope=scope) # Swap-in the smoothed versions of the variables. if configs[0].exponential_smoothing > 0.0: session.run(fetches=smoothing.swap_ops) # FIXME Should be an option in settings max_translation_len = configs[0].translation_maxlen # Create a BeamSearchSampler / RandomSampler. if settings.translation_strategy == 'beam_search': sampler = BeamSearchSampler(models, configs, settings.beam_size) else: assert settings.translation_strategy == 'sampling' sampler = RandomSampler(models, configs, settings.beam_size) # Warn about the change from neg log probs to log probs for the RNN. if settings.n_best: model_types = [config.model_type for config in configs] if 'rnn' in model_types: logging.warn( 'n-best scores for RNN models have changed from ' 'positive to negative (as of commit 95793196...). ' 'If you are using the scores for reranking etc, then ' 'you may need to update your scripts.') # Translate the source file. translate_utils.translate_file( input_file=settings.input, output_file=settings.output, session=session, sampler=sampler, config=configs[0], max_translation_len=max_translation_len, normalization_alpha=settings.normalization_alpha, nbest=settings.n_best, minibatch_size=settings.minibatch_size, maxibatch_size=settings.maxibatch_size)
def recognize(args): model, LFR_m, LFR_n = Transformer.load_model(args.model_path) print(model) model.eval() model.cuda() char_list, sos_id, eos_id = process_dict(args.dict) assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id tr_dataset = AudioDataset('test', args.batch_size) path_list = tr_dataset.path_lst label_list = tr_dataset.han_lst num_data = tr_dataset.path_count ran_num = random.randint(0, num_data - 1) num = args.count words_num = 0 word_error_num = 0 seq_error = 0 data = '' with torch.no_grad(): for index in range(num): try: print('\nthe ', index + 1, 'th example.') data += 'the ' + str(index + 1) + 'th example.\n' index = (ran_num + index) % num_data standard_label = label_list[index] feature, label = get_fbank_and_hanzi_data( index, args.feature_dim, char_list, path_list, label_list) if len(feature) > 1600: continue input = build_LFR_features(feature, args.LFR_m, args.LFR_n) input = torch.from_numpy(input).float() input_length = torch.tensor([input.size(0)], dtype=torch.int) input = input.cuda() nbest_hyps = model.recognize(input, input_length, char_list, args) pred_label = nbest_hyps[0]['yseq'][1:-1] pred_res = ''.join([char_list[index] for index in pred_label]) print("stand:", label) print("pred :", pred_label) data += "stand:" + str(standard_label) + '\n' data += "pred :" + str(pred_res) + '\n' words_n = len(label) words_num += words_n word_distance = GetEditDistance(pred_label, label) if (word_distance <= words_n): word_error_num += word_distance else: word_error_num += words_n if pred_label != label: seq_error += 1 except ValueError: continue print('WER = ', (1 - word_error_num / words_num) * 100, '%') print('CER = ', (1 - seq_error / args.count) * 100, '%') data += 'WER = ' + str((1 - word_error_num / words_num) * 100) + '%' data += 'CER = ' + str((1 - seq_error / args.count) * 100) + '%' with open('../../model_log/pred/test_' + str(args.count) + '.txt', 'w', encoding='utf-8') as f: f.writelines(data)
def _setup(self, config): print('NaruTrainer config:', config) os.chdir(config["cwd"]) for k, v in config.items(): setattr(self, k, v) self.epoch = 0 if callable(self.text_eval_corpus): self.text_eval_corpus = self.text_eval_corpus() # Try to make all the runs the same, except for input orderings. torch.manual_seed(0) np.random.seed(0) assert self.dataset in [ 'dmv', 'dmv-full', 'census', 'synthetic', 'kdd', 'kdd-full', 'url', 'url-tiny', 'dryad-urls', 'dryad-urls-small' ] if self.shuffle_at_data_level: data_order_seed = self.order_seed else: data_order_seed = None if self.dataset == 'dmv-full': table = datasets.LoadDmv(full=True, order_seed=data_order_seed) elif self.dataset == 'dmv': table = datasets.LoadDmv(order_seed=data_order_seed) elif self.dataset == 'synthetic': table = datasets.LoadSynthetic(order_seed=data_order_seed) elif self.dataset == 'census': table = datasets.LoadCensus(order_seed=data_order_seed) elif self.dataset == 'kdd': table = datasets.LoadKDD(order_seed=data_order_seed) elif self.dataset == 'kdd-full': table = datasets.LoadKDD(full=True, order_seed=data_order_seed) elif self.dataset == 'url-tiny': table = datasets.LoadURLTiny() elif self.dataset == 'dryad-urls': table = datasets.LoadDryadURLs() elif self.dataset == 'dryad-urls-small': table = datasets.LoadDryadURLs(small=True) self.table = table self.oracle = Oracle( table, cache_dir=os.path.expanduser("~/oracle_cache")) try: self.table_bits = Entropy( self.table, self.table.data.fillna(value=0).groupby( [c.name for c in table.columns]).size(), [2])[0] except Exception as e: print("Error computing table bits", e) self.table_bits = 0 # TODO(ekl) why does dmv-full crash on ec2 fixed_ordering = None if self.special_orders <= 1: fixed_ordering = list(range(len(table.columns))) if self.entropy_order: assert self.num_orderings == 1 res = [] for i, c in enumerate(table.columns): bits = Entropy(c.name, table.data.groupby(c.name).size(), [2]) res.append((bits[0], i)) s = sorted(res, key=lambda b: b[0], reverse=self.reverse_entropy) fixed_ordering = [t[1] for t in s] print('Using fixed ordering:', '_'.join(map(str, fixed_ordering))) print(s) if self.order is not None: print('Using passed-in order:', self.order) fixed_ordering = self.order if self.order_seed is not None and not self.shuffle_at_data_level: if self.order_seed == "reverse": fixed_ordering = fixed_ordering[::-1] else: rng = np.random.RandomState(self.order_seed) rng.shuffle(fixed_ordering) print('Using generated order:', fixed_ordering) print(table.data.info()) self.fixed_ordering = fixed_ordering table_train = table if self.special_orders > 0: special_orders = _SPECIAL_ORDERS[self.dataset][:self.special_orders] k = len(special_orders) seed = self.special_order_seed * 10000 for i in range(k, self.special_orders): special_orders.append( np.random.RandomState(seed + i - k + 1).permutation( np.arange(len(table.columns)))) print('Special orders', np.array(special_orders)) else: special_orders = [] if self.use_transformer: args = { "num_blocks": 4, "d_model": 64, "d_ff": 256, "num_heads": 4, "nin": len(table.columns), "input_bins": [c.DistributionSize() for c in table.columns], "use_positional_embs": True, "activation": "gelu", "fixed_ordering": fixed_ordering, "dropout": False, "seed": self.seed, "first_query_shared": False, "prefix_dropout": self.prefix_dropout, "mask_scheme": 0, # XXX only works for default order? } args.update(self.transformer_args) model = Transformer(**args).to(get_device()) else: model = MakeMade( scale=self.fc_hiddens, cols_to_train=table.columns, seed=self.seed, dataset=self.dataset, fixed_ordering=fixed_ordering, special_orders=special_orders, layers=self.layers, residual=self.residual, embed_size=self.embed_size, dropout=self.dropout, per_row_dropout=self.per_row_dropout, prefix_dropout=self.prefix_dropout, fixed_dropout_ratio=self.fixed_dropout_ratio, input_no_emb_if_leq=self.input_no_emb_if_leq, disable_learnable_unk=self.disable_learnable_unk, embs_tied=self.embs_tied) child = None print(model.nin, model.nout, model.input_bins) blacklist = None mb = ReportModel(model, blacklist=blacklist) self.mb = mb if not isinstance(model, Transformer): print('applying weight_init()') model.apply(weight_init) if isinstance(model, Transformer): opt = torch.optim.Adam( list(model.parameters()) + (list(child.parameters()) if child else []), 2e-4, betas=(0.9, 0.98), eps=1e-9, ) else: opt = torch.optim.Adam( list(model.parameters()) + (list(child.parameters()) if child else []), 2e-4) self.train_data = TableDataset(table_train) self.model = model self.opt = opt if self.checkpoint_to_load: self.model.load_state_dict(torch.load(self.checkpoint_to_load))
def runup(self): b = Browser() for i in xrange(100): print i b.get('http://wmtake.ru/m.base/bonus.php') captcha = b.js('$("#scode-pic img")[0].src') b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) t = Transformer() t.load('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 150, CV_THRESH_BINARY_INV) ''' radius = 3 kernel = cvCreateStructuringElementEx(radius * 2 + 1, radius * 2 + 1, radius, radius, CV_SHAPE_ELLIPSE) t.morphology('morphology', t['binarize'], 1, 1, kernel) ''' t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symbolqty: continue t.normolize('origsplit', 'breaksplit', 20, 30) t.savesymbols('origsplit', '/home/polzuka/inspirado/symbols/wmtake', '%02d' % i) del t
params = TransformerParams() logger = get_logger('validation', params.experiment_dir) logger.info("Logging to {}".format(params.experiment_dir)) # preprocess data dataset = tf.data.Dataset.from_tensor_slices( (questions_encoded, answers_encoded)) input_data = dataset.take(params.num_examples).shuffle(questions_encoded.shape[0]).batch(params.batch_size) \ .prefetch(buffer_size=tf.data.experimental.AUTOTUNE) train_data = input_data.take(params.num_training_batches).repeat( params.num_epochs) valid_data = input_data.skip(params.num_training_batches) model = Transformer(params) model.train(params, train_data, valid_data, logger) # model.inference() ''' HN NOTE: For generalizability of training pipeline, Train steps should be methods of the model and individual train steps should output masked preds + targets But the training loop should be general Training loop should be similar to lstm.py current one, contain - Tensorboard logging - Validation loss + accuracy if i % n - Early stopping check - Outputting samples - Model checkpointing '''
def pickup(self): # создаем браузер, которым будем ходить по wmtake.ru b = Browser() # сщздаем анализатор, которым будем распознавать капчу a = Analyzer(self.site, self.symsize, self.charset) a.load() b.show() log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') while (True): log.debug('SAVING CAPTCHA') captcha = b.js('$("#scode-pic img")[0].src') #b.save(captcha, '/home/polzuka/inspirado/captcha/wmtake/%02d.gif' % i) log.debug('CAPTCHA TRANSFORMING') try: t = Transformer('orig', b.image(captcha)) t.resizeby('resize', t['orig'], 2, 2) t.grayscale('grayscale', t['resize'], 2) t.binarize('binarize', t['grayscale'], 150, CV_THRESH_BINARY_INV) t.contourSplit('breaksplit', t['binarize'], 0.001) if len(t.symbols) != self.symqty: raise Exception except Exception, e: log.debug(e) log.debug(colorize('INCORRECT SYMBOL NUMBER', RED)) log.debug('LOADING PAGE WITH WM BONUS') b.get('http://wmtake.ru/m.base/bonus.php') continue t.normolize('origsplit', 'breaksplit', self.symsize) symbols = t.slice('origsplit') log.debug('RECOGNITION CAPTCHA') code = a.captcha(symbols) log.debug('ANALYZE RESULT: %s' % colorize(code)) del t print code log.debug('FILLING FIELDS') b.js("$('#scode').val('%s')" % code) b.js("$('#purse').val('R%s')" % self.purse) b.js("$('div.news_box div.bn p').click()") b.sleep(10) if not b.js("$('#mess-exec:visible').length"): log.debug('FINISH') break log.debug('INCORRECT CAPCTHA RECOGNITION') log.debug('LOADING PAGE WITH WM BONUS') b.js("$('#mess-exec p').click()")
print('Steps {} Loss {:.4f}'.format(s, train_loss.result())) self.train_step(self.train_iter.next()) print('Steps {} Loss {:.4f}'.format(steps, train_loss.result())) self.model.save() print('model saved') print('training finished') if __name__ == "__main__": #train_data = VQA(r'D:\documents\coding\Data\coco\v2_mscoco_train2014_annotations.json', #r'D:\documents\coding\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json', #r'D:\documents\coding\Data\coco\train2014\COCO_train2014_{0}.jpg', #r'D:\documents\coding\Data\coco\v2_mscoco_train2014_complementary_pairs.json') train_data = VQA( r'D:\lgy\Document\Python\Data\coco\v2_mscoco_train2014_annotations.json', r'D:\lgy\Document\Python\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json', r'D:\lgy\Document\Python\Data\coco\train2014\COCO_train2014_{0}.jpg') train_iter = VQAIter(train_data, train_data.getQuesIds(ansTypes=['other', 'yes/no']), hp.batch_size, hp.num_chunks) max_qst_len = hp.max_qst_len max_ans_len = hp.max_ans_len model = Transformer(hp.num_layers, hp.d_model, hp.num_heads, hp.dff, max_qst_len + 3, hp.dropout_rate) trainer = Trainer(train_iter, model, 16, max_qst_len, max_ans_len) trainer.train(hp.steps, hp.steps_per_save, hp.steps_per_chunk, hp.steps_per_report)
class Generator(nn.Module): def __init__(self, marker_num, neighbor_num, embed_dim, d_model, d_inner, d_q, d_k, d_v, n_head, candi_size, max_time, beta, cuda_id, dropout=0.1): super(Generator, self).__init__() # Modules self.encoder = Transformer(n_head, d_model, d_inner, d_q, d_k, d_v, cuda_id, dropout) self.time_embed_linear = nn.Linear(1, embed_dim) self.embed_linear = nn.Linear(embed_dim, d_model) self.embed_ac = nn.LeakyReLU() self.sample_linear_1 = nn.Linear(2 * embed_dim, embed_dim) self.wc = nn.LeakyReLU() self.sample_linear_2 = nn.Linear(embed_dim, 1) self.marker_linear = nn.Linear(embed_dim + d_model, 1) self.time_linear = nn.Linear(d_model, 1) # Constants self.max_time = max_time self.d_model = d_model self.cuda_id = cuda_id self.beta = beta self.embed_dim = embed_dim self.marker_num = marker_num self.sample_size = neighbor_num self.candi_size = candi_size # Descendants in an epoch self.neighbor_list = None self.neighbor_prob = None self.candidates = None def sample_neighbors(self, embedding): # Initialize descendants at the beginning of an epoch self.candidates = torch.multinomial( torch.ones(1, self.marker_num).cuda(self.cuda_id), self.candi_size) total_candidates = torch.stack( [self.candidates[0] for _ in range(self.marker_num)], 0) sel_candi_embeds = embedding[self.candidates][0] current_matrix = torch.stack( [sel_candi_embeds for _ in range(self.marker_num)], 0) total_matrix = torch.stack([embedding for _ in range(self.candi_size)], 1) matrix = torch.cat((current_matrix, total_matrix), 2) output_matrix = self.sample_linear_1(matrix) activated_out_matrix = self.wc(output_matrix) final_matrix = self.sample_linear_2(activated_out_matrix).squeeze(2) prob_res = torch.softmax(final_matrix, 1) # Initialize descendants and probabilities print(prob_res.size()) print(self.sample_size) neighbors = torch.multinomial(prob_res, self.sample_size) p_list = torch.gather(prob_res, 1, neighbors) self.neighbor_list = torch.gather(total_candidates, 1, neighbors) self.neighbor_prob = p_list def get_time_embedding(self, time): time_embed = self.time_embed_linear(time) time_embed_res = time_embed.unsqueeze(1) return time_embed_res def get_marker_embedding(self, marker, embedding): marker_embed_res = embedding[marker] return marker_embed_res def get_embedding(self, marker, time, embedding): time_vector = self.get_time_embedding(time) marker_vector = self.get_marker_embedding(marker, embedding) vector = marker_vector + self.beta * time_vector return vector def forward(self, marker_data, time_data, mask_data, embedding): # Forward Propagation d_size = time_data.size() # Initialize data self.index = 0 marker_res = marker_data[:, 0:1].clone() time_res = time_data[:, 0:1].clone() mask_res = mask_data[:, 0:1].clone() data_input = torch.zeros(d_size[0], 0, self.d_model).cuda(self.cuda_id) candidate_list = marker_data[:, 0:1] prob_list = torch.ones(d_size[0], 1).cuda(self.cuda_id) chosen_index = torch.zeros(d_size[0], 1, dtype=torch.long).cuda(self.cuda_id) neighbor_prob_record = torch.ones(d_size[0], 1).cuda(self.cuda_id) total_neighbor_prob = torch.ones(d_size[0], 1).cuda(self.cuda_id) total_sample_prob = torch.ones(d_size[0], 1).cuda(self.cuda_id) # Generating Cascades while self.index < d_size[1] - 1: last_marker = marker_res[:, self.index:self.index + 1] last_time = time_res[:, self.index:self.index + 1] new_vector = self.get_embedding(last_marker, last_time, embedding) model_dim_vector = self.embed_ac(self.embed_linear(new_vector)) data_input = torch.cat((data_input, model_dim_vector), 1) intensity = self.encoder.forward(data_input, self.index) # Time Decoding new_time = last_time + F.softplus( self.time_linear(intensity.squeeze(1))) # Causal Descendants time_res = torch.cat((time_res, new_time), 1) new_mask = torch.lt(new_time, self.max_time).float() neighbor = self.neighbor_list[last_marker].squeeze(1) prob_neighbor = self.neighbor_prob[last_marker].squeeze(1) neighbor_prob_record = torch.cat( (neighbor_prob_record, prob_neighbor), 1) # Intensity Function neighbor_inf = embedding[neighbor] intensity_inf = torch.stack([(intensity.squeeze(1)) for _ in range(self.sample_size)], 1) inf_matrix = torch.cat((neighbor_inf, intensity_inf), 2) # Marker Decoding marker_weight = self.marker_linear(inf_matrix).squeeze(2) marker_prob = torch.softmax(marker_weight, 1) candidate_list = torch.cat((candidate_list, neighbor[:, 1:]), 1) chosen_prob = torch.gather(prob_list, 1, chosen_index) attached_prob = chosen_prob * marker_prob for i in range(d_size[0]): prob_list[i][chosen_index[i]] = attached_prob[i][0] prob_list = torch.cat((prob_list, attached_prob[:, 1:]), 1) chosen_index = torch.multinomial(prob_list, 1) new_markers = torch.gather(candidate_list, 1, chosen_index) # Record Probabilities for BP selected_neighbor_prob = torch.gather(neighbor_prob_record, 1, chosen_index) total_neighbor_prob = torch.cat( (total_neighbor_prob, selected_neighbor_prob), 1) selected_sample_prob = torch.gather(prob_list, 1, chosen_index) total_sample_prob = torch.cat( (total_sample_prob, selected_sample_prob), 1) self.index += 1 # Mark down the Results marker_res = torch.cat((marker_res, new_markers), 1) mask_res = torch.cat((mask_res, new_mask), 1) return marker_res, time_res, mask_res, total_neighbor_prob, total_sample_prob def test_predict(self, test_marker, test_time, test_mask, true_neigh_list, embedding, type_eval): size = test_time.size() if type_eval: time_mse = [0, 0, 0, 0, 0] marker_correct_count = [0, 0, 0, 0, 0] marker_ttl_count = [0, 0, 0, 0, 0] else: time_mse = [0, 0, 0, 0] marker_correct_count = [0, 0, 0, 0] marker_ttl_count = [0, 0, 0, 0] # For each cascade for j in range(size[0]): # Get single marker, time and mask (1, 6) single_marker = test_marker[j:j + 1, :] single_time = test_time[j:j + 1, :] single_mask = test_mask[j:j + 1, :] if type_eval: if torch.sum(single_mask, 1).item() < 6: continue else: if torch.sum(single_mask, 1).item() < 5: continue length = torch.sum(single_mask, 1).item() for k in range(len(marker_ttl_count)): marker_ttl_count[k] += 1 sample_prob = torch.ones(1, 1).cuda(self.cuda_id) candidates = single_marker[:, 0:1] total_candidates = [] total_probabilities = [] data_input = torch.zeros(1, 0, self.d_model).cuda(self.cuda_id) last_index = 0 # First store previous informations for i in range(int(length)): total_candidates.append(candidates.clone()) total_probabilities.append(sample_prob.clone()) new_vector = self.get_embedding(single_marker[:, i:i + 1], single_time[:, i:i + 1], embedding) data_input = torch.cat((data_input, new_vector), 1) intensity = self.encoder.forward(data_input, i) if true_neigh_list[single_marker[0][i].item()] == []: empiri_list = [single_marker[0][i].item()] else: empiri_list = true_neigh_list[single_marker[0][i].item()] candidate_i = torch.LongTensor([empiri_list ]).cuda(self.cuda_id) neigh_size = len(empiri_list) neighbor_inf = embedding[candidate_i] intensity_inf = torch.stack([(intensity.squeeze(1)) for _ in range(neigh_size)], 1) inf_matrix = torch.cat((neighbor_inf, intensity_inf), 2) marker_weight = self.marker_linear(inf_matrix).squeeze(2) marker_prob = torch.softmax(marker_weight, 1) if last_index != -1: attach_prob = sample_prob[0][last_index].clone() sample_prob[0][last_index] = sample_prob[0][ last_index] * marker_prob[0][0] candidates[0][last_index] = candidate_i[0][0] new_marker_prob = attach_prob * marker_prob[:, 1:] sample_prob = torch.cat((sample_prob, new_marker_prob), 1) candidates = torch.cat((candidates, candidate_i[:, 1:]), 1) else: sample_prob = torch.cat((sample_prob, marker_prob), 1) candidates = torch.cat((candidates, candidate_i), 1) if i != length - 1: if single_marker[0][i + 1].item() in candidates.tolist()[0]: last_index = torch.argmax( candidates == single_marker[0][i + 1].item()).item() else: last_index = -1 for p in range(len(marker_ttl_count)): curr_id = int((length - 1) / len(marker_ttl_count) * p) new_vector = self.get_embedding( single_marker[:, curr_id:curr_id + 1], single_time[:, curr_id:curr_id + 1], embedding) data_input = torch.cat((data_input, new_vector), 1) intensity = self.encoder.forward(data_input, curr_id) # Calculate MSE delta_time = F.softplus(self.time_linear(intensity.squeeze(1))) time_bias = ((delta_time - (single_time[:, curr_id + 1:curr_id + 2] - single_time[:, curr_id:curr_id + 1])) * (delta_time - (single_time[:, curr_id + 1:curr_id + 2] - single_time[:, curr_id:curr_id + 1]))).item() time_mse[p] += time_bias # Calculate accu for markers new_vector = self.get_embedding( single_marker[:, curr_id + 1:curr_id + 2], single_time[:, curr_id + 1:curr_id + 2], embedding) data_input = torch.cat((data_input, new_vector), 1) intensity = self.encoder.forward(data_input, curr_id + 1) curr_candi = total_candidates[curr_id + 1] curr_prob = total_probabilities[curr_id + 1] samp_neighs = self.neighbor_list[single_marker[:, curr_id + 1:curr_id + 2]].squeeze(1) neighbor_inf = embedding[samp_neighs] intensity_inf = torch.stack([(intensity.squeeze(1)) for _ in range(self.sample_size)], 1) inf_matrix = torch.cat((neighbor_inf, intensity_inf), 2) marker_weight = self.marker_linear(inf_matrix).squeeze(2) marker_prob = torch.softmax(marker_weight, 1) if single_marker[0, curr_id + 1] in curr_candi.tolist()[0]: curr_index = torch.argmax( curr_candi == single_marker[0][curr_id + 1].item()).item() else: curr_index = -1 if curr_index != -1: attach_prob = curr_prob[0][curr_index] curr_candi[0][curr_index] = samp_neighs[0][0] curr_prob[0][curr_index] = attach_prob * marker_prob[0][0] next_candi = torch.cat((curr_candi, samp_neighs[:, 1:]), 1) next_prob = torch.cat((curr_prob, marker_prob[:, 1:]), 1) else: next_candi = torch.cat((curr_candi, samp_neighs), 1) next_prob = torch.cat((curr_prob, marker_prob), 1) predict = next_candi[0][torch.multinomial( next_prob, 1)[0][0].item()].item() if predict in total_candidates[curr_id + 1][0].tolist(): marker_correct_count[p] += 1 accu = list( map(lambda x: x[0] / x[1], zip(marker_correct_count, marker_ttl_count))) mse = list(map(lambda x: x[0] / x[1], zip(time_mse, marker_ttl_count))) return accu, mse
class Extract(object): ''' classdocs ''' ignore = ['', 'php', 'json', 'js'] #load these words from tech terms dictionary breakchars = [''] def __init__(self, platformName, tracedir, pattern="*.json", appDomains=[]): ''' Constructor ''' self.platformName = platformName self.tracedir = tracedir self.pattern = pattern self.appDomains = appDomains self.traces = {} self.keywords = {} #Clustering Nodes self.allReq = [] #flattened trace used by new approach self.allItems = [] #flattened trace self.allSeq = [] self.cluster = [] self.clusterLabels = {} self.t = Transformer() #Distances self.dist = {} self.THRESH = utils.getDefaultThresh() #Graph fields self.nodes = [] self.edges = [] # Load Traces and Extract Keywords self.loadTraces() #self.extractKeywords() #self.eliminateRedundantKws() def printLoadStats(self): print self.platformName, "Load Stats::" print " - Traces:", len(self.traces), "\tRequests:", len(self.allReq) # reqCnt = 0; # for t in self.traces: # for r in self.traces[t]: # if 'REQUEST' in r: # reqCnt = reqCnt + 1 # print " - Requests:", reqCnt, ", average req/trace=", reqCnt/len(self.traces) def loadTraces(self): #print "Loading Traces in", self.tracedir, "with pattern", self.pattern os.chdir(self.tracedir) for tracefile in glob.glob(self.pattern): #print " - Loading", tracefile trace = open(tracefile) data = json.load(trace) self.traces[tracefile] = data cnt = 0 ignoredDomains = set() for k,v in sorted(self.traces.iteritems()): for action in v: if 'REQUEST' in action and utils.isCodeOrData(action): url = urlparse.urlparse(action['REQUEST']) if url.netloc in self.appDomains: action['ID'] = cnt self.allReq.append(action) cnt = cnt+1 else: ignoredDomains.add(url.netloc) if len(ignoredDomains) > 0: print "Ignored request from domains", ignoredDomains, "for", self.platformName #Not used def extractKeywords(self, ignoredKeywords=None): for k,v in sorted(self.traces.iteritems()): #print "-->",k,v traceKeywords = [] for action in v: if 'REQUEST' in action and utils.isCodeOrData(action): #print urlparse.urlparse(action['REQUEST']) ## url = urlparse.urlparse(action['REQUEST']) if url.netloc in self.appDomains: rel_uri = url.path + '?' + url.query action['URI'] = rel_uri id = action['ID'] #get only path keywords # kws = re.compile('[\W]').split(url.path) #get only qs keys # kws = [re.compile('[\W]').split(url.path)] # qs_keys = urlparse.parse_qs(url.query).keys() # post_data = urllib.unquote(action['post_data']) # ps_keys = urlparse.parse_qs(post_data).keys() # kws.append[qs_keys, ps_keys] #get all words from keys and values qs = urlparse.parse_qs(url.query) qsw = [] for qk in qs.keys(): qsw.extend([qk, ",".join(qs[qk])]) #Fix, split words with separators like / (TODO: Add more separators) nqsw = [] for w in qsw: if '/' in w: nqsw.extend(w.split('/')) else: nqsw.append(w) qsw = nqsw postw = [] if 'post_data' in action: post_data = urllib.unquote(action['post_data']) ps = urlparse.parse_qs(post_data) for pk in ps.keys(): postw.extend([pk, ",".join(ps[pk])]) #print "POST_words::", postw kws = re.compile('[\W]').split(url.path) + qsw + postw #get all words combined # kws = re.compile('[\W]').split(rel_uri) #TODO fix these if ignoredKeywords != None: self.ignore.extend(ignoredKeywords) kws = filter(lambda x : x not in self.ignore, kws) kws = self.t.transform(kws) action['KEYWORDS'] = kws traceKeywords.append((rel_uri, kws, id)) self.keywords[k] = traceKeywords def eliminateRedundantKws(self): redundant = None for k,v in self.keywords.iteritems(): for tup in v: if redundant is None: redundant = set(tup[1]) else: redundant = redundant & set(tup[1]) keys = self.keywords.keys() for i in range(len(self.keywords)): k = keys[i] v = self.keywords[k] for j in range(len(v)): v[j] = (v[j][0], [t for t in v[j][1] if t not in redundant], v[j][2]) for k,v in self.traces.iteritems(): for a in v: if 'KEYWORDS' in a: a['KEYWORDS'] = [t for t in a['KEYWORDS'] if t not in redundant] def printKeywords(self): #print "Printing URIs" for k,v in sorted(self.keywords.iteritems()): print k for tup in v: print " %d. %s"%(tup[2],tup[1]) #print " ", tup[0] def recognizeActions(self): cluster = self.cluster def clusterActions(self, thresh=None): print "Matching keywords to generate associations" self.computeDistances() # for k, v in self.keywords.iteritems(): # for tup in v: # keywords = tup # idx = getMatchedNode(keywords) # if idx == None: # self.nodes.append((keywords)) if thresh: self.THRESH = thresh #agglomerative clustering cluster = self.cluster n=len(self.dist) for i in range(0,n-1): for j in range(i+1,n): if self.dist[i,j] < self.THRESH: # if a cluster already contains i icl = self.isPresent(cluster, i) if icl != None: icl.add(j) break # if a cluster contains j (rare) jcl = self.isPresent(cluster, j) if jcl is not None: jcl.add(i) else: cluster.append(set([i,j])) if not self.isPresent(cluster, i): cluster.append(set([i])) if not self.isPresent(cluster, n-1): cluster.append(set([n-1])) print " => Clusters:", cluster #print self.doCount(cluster) #self.printClusters(cluster) #print self.allSeq def printClusters(self): for cl in self.cluster: print cl for c in cl: print "\t", self.allItems[c] def assignLabelsUsingClustering(self, thresh=None): self.clusterActions(thresh) cluster = self.cluster #print cluster cl = self.clusterLabels labels = utils.getUUChars() for c in cluster: l = labels[0] cl[l] = c labels.remove(l) def printClusterLabels(self): cl = self.clusterLabels for k in cl: print k, ':', cl[k] def printLabeledTraces(self): for k,v in sorted(self.traces.iteritems()): print k for action in v: if 'ID' in action: print " ", self.getLabel(action['ID']), action['KEYWORDS'] # Print URL print "\t", if 'post_data' in action: print "POST", print action['URI'] def getLabel(self, actionNum): cl = self.clusterLabels for k in cl: if actionNum in cl[k]: return k print "## ERROR: cluster doesn't contain ", actionNum, ", platform:", self.tracedir def getLabels(self): return self.clusterLabels # Assign label based on other platform def assignLabels(self, labelMap, platform): cluster = self.cluster cl = self.clusterLabels pLabels = platform.getLabels() chars = utils.getUUChars() charList = chars[len(pLabels):] for c in cluster: labels = set([]) #print c, ":: ", for actionNum in c: if actionNum in labelMap: labels = labels.union(labelMap[actionNum]) #print "= Same cluster labels", labels label = None if len(labels) == 0: label = charList[0] charList.remove(label) else: sLabels = list(labels) sLabels.sort() label = sLabels[0] labels.remove(label) if len(labels) > 0: print "- merging desktop labels", utils.printSet(sLabels) #merge similar labels from other platform for l in labels: if l in pLabels and label in pLabels: pLabels[label] = pLabels[label].union(pLabels[l]) del pLabels[l] if label in cl: cl[label].update(c) else: cl[label] = c # Old code. Single label assigner # def assignLabels(self, labelMap, charList): # cluster = self.cluster # cl = self.clusterLabels # for c in cluster: # label = None # #print c, ":: ", # for actionNum in c: # if actionNum in labelMap: # label = iter(labelMap[actionNum]).next() #get first label # # TODO: check possibility: same cluster in mobile have different corresponding labels # break # if label is None: # label = charList[0] # charList.remove(label) # cl[label] = c def printActionLabel(self, actionNum, labelMap): print actionNum, for l in labelMap[actionNum]: print l, print def doCount(self, cluster): cnt = 0 for cl in cluster: cnt = cnt+len(cl) return cnt def isPresent(self, cluster, x): for cl in cluster: for c in cl: if c == x: return cl return None def computeDistances(self): for k, v in sorted(self.keywords.iteritems()): prev = None for tup in v: self.allItems.append((tup[0], tup[1], k)) cnt = len(self.allItems)-1 if prev != None: self.allSeq.append([prev, cnt, k]) prev = cnt n=len(self.allItems) self.dist = numpy.zeros(shape=(n,n)) for i in range(0,n): for j in range(0,n): try: self.dist[i,j] = jaccard_distance(set(self.allItems[i][1]), set(self.allItems[j][1])) except ZeroDivisionError: self.dist[i,j] = 0 #sys.maxint # fileName = os.getcwd().split('/')[-1]+".csv" # print self.platformName, "Distances saved to", fileName # numpy.savetxt(fileName, self.dist, '%.4e') def printGraphViz(self): print "*"*80 print "Graphviz Output::" print "*"*80 print "digraph {" num2Node = {} # Add Nodes cl = self.cluster for c in range(0, len(cl)) : lbl = set() for en in cl[c]: lbl.add(self.allItems[en][0]) num2Node[en] = "A%d"%(c) print " A%d [label=\"%s\"];"%(c, "\\n".join(lbl)) #Generate colors to be assigned to Edges colorMap = {} traceFiles = self.keywords.keys() N = len(traceFiles) HSV_tuples = [(x*1.0/N, 0.5, 0.5) for x in range(N)] colors = map(lambda x: colorsys.hsv_to_rgb(*x), HSV_tuples) #RGB colors for i in range(0, N): colorMap[traceFiles[i]] = colors[i] #print colorMap # Add Edges for edge in self.allSeq: c = colorMap[edge[2]] color = "#%02x%02x%02x"%(255*c[0], 255*c[1], 255*c[2]) print " ", num2Node[edge[0]], "->", num2Node[edge[1]], " [color=\"%s\", label=\"%s\", fontcolor=\"%s\"];"%(color, edge[2], color) print "}" def normalize(self, label): return re.sub("([a-z])([A-Z])","\g<1> \g<2>",label)
class TranslationTransformer(object): def __init__(self, model_path, input_tokenizer_path, output_tokenizer_path): self.inp_tokenizer = spm.SentencePieceProcessor() self.inp_tokenizer.LoadFromFile(input_tokenizer_path) self.inp_tokenizer.SetEncodeExtraOptions('bos:eos') self.tar_tokenizer = spm.SentencePieceProcessor() self.tar_tokenizer.LoadFromFile(output_tokenizer_path) self.tar_tokenizer.SetEncodeExtraOptions('bos:eos') model_path = os.path.abspath(model_path) with open(os.path.join( model_path, 'transformer_description.json')) as transformer_desc_json: desc = json.load(transformer_desc_json) checkpoint_path = os.path.join(model_path, 'train') self.inp_dim = desc['inp_dim'] self.tar_dim = desc['tar_dim'] self.model = Transformer( num_layers=desc['num_layers'], d_model=desc['d_model'], dff=desc['dff'], num_heads=desc['num_heads'], dropout_rate=desc['dropout_rate'], universal=desc['universal'], shared_qk=desc['shared_qk'], inp_dim=self.inp_dim, inp_vocab_size=self.inp_tokenizer.vocab_size(), tar_dim=self.tar_dim, tar_vocab_size=self.tar_tokenizer.vocab_size(), inp_bos=self.inp_tokenizer.bos_id(), inp_eos=self.inp_tokenizer.eos_id(), tar_bos=self.tar_tokenizer.bos_id(), tar_eos=self.tar_tokenizer.eos_id(), ckpt_path=checkpoint_path) def parallel_tokenize_py(self, inp, tar): return self.inp_tokenizer.SampleEncodeAsIds(inp.numpy(), -1, 0.2),\ self.tar_tokenizer.SampleEncodeAsIds(tar.numpy(), -1, 0.2) def parallel_tokenize(self, inp, tar): return tf.py_function(self.parallel_tokenize_py, [inp, tar], [tf.int32, tf.int32]) def truncate_oversize_inputs(self, inp, tar): return inp[:self.inp_dim], tar def filter_max_len(self, inp, tar): return tf.logical_and( tf.size(inp) <= self.inp_dim, tf.size(tar) <= self.tar_dim) def create_parallel_dataset(self, inp_path, tar_path, batch_size=64, shuffle_buffer_size=10000, filter_oversize_targets=False): inp = tf.data.TextLineDataset(inp_path) tar = tf.data.TextLineDataset(tar_path) dataset = tf.data.Dataset.zip((inp, tar)) dataset = dataset.shuffle(shuffle_buffer_size, reshuffle_each_iteration=True) dataset = dataset.map(self.parallel_tokenize) dataset = dataset.map(self.truncate_oversize_inputs) if filter_oversize_targets: dataset = dataset.filter(self.filter_max_len) dataset = dataset.padded_batch(batch_size, (self.inp_dim, self.tar_dim)) return dataset def train(self, train_inp_path, train_tar_path, val_inp_path, val_tar_path, batch_size=64, num_epochs=100, shuffle_buffer_size=10000): train_set = self.create_parallel_dataset( train_inp_path, train_tar_path, batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, filter_oversize_targets=True) val_set = self.create_parallel_dataset( val_inp_path, val_tar_path, batch_size=batch_size, shuffle_buffer_size=shuffle_buffer_size, filter_oversize_targets=True) self.model.train(train_set, val_set, num_epochs=num_epochs) def __call__(self, inputs, beam_width=10): inp_tok = [self.inp_tokenizer.EncodeAsIds(inp) for inp in inputs] inp_pad = tf.keras.preprocessing.sequence.pad_sequences( inp_tok, maxlen=self.inp_dim, padding='post', truncating='post', dtype='int32') inp_tensor = tf.convert_to_tensor(inp_pad) pred_tar = self.model.translate_batch(inp_tensor, beam_width=beam_width).numpy() ends = tf.argmax(tf.cast( tf.equal(pred_tar, self.tar_tokenizer.eos_id()), tf.float32), axis=1).numpy() + 1 pred_detok = [] for i in range(len(pred_tar)): pred_detok.append( self.tar_tokenizer.DecodeIds(pred_tar[i, :ends[i]].tolist())) return pred_detok
def main(args): # Construct Solver # data tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, num_workers=args.num_workers, shuffle=args.shuffle, LFR_m=args.LFR_m, LFR_n=args.LFR_n) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, num_workers=args.num_workers, LFR_m=args.LFR_m, LFR_n=args.LFR_n) # load dictionary and generate char_list, sos_id, eos_id char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} # model encoder = Encoder(args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) print(model) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #model.cuda() model.to(device) # optimizer optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
def scmoplot(root_path, user_ps): ps = dict(default_ps) ps.update(user_ps) ng = NameGleaner(scan=r'scan=(\d+)', x=r'x=(\d+)', y=r'y=(\d+)', averaged=r'(averaged)') tfmr = Transformer(gleaner=ng) tfmr.add(10, tfms.scale, params={'xsc': 0.1}) tfmr.add(20, tfms.flatten_saturation, params={'threshold': ps['thresh'], 'polarity': '+'}) tfmr.add(25, tfms.center) tfmr.add(30, tfms.wrapped_medfilt, params={'ks': ps['filt_ks']}) tfmr.add(40, tfms.saturation_normalize, params={'thresh': ps['thresh']}) tfmr2 = Transformer(gleaner=ng) tfmr2.add(10, tfms.scale, params={'xsc': 0.1}) tfmr2.add(30, tfms.wrapped_medfilt, params={'ks': ps['filt_ks']}) tfmr2.add(40, tfms.clean) clust = Cluster(join(root_path, 'parameters.xml')).to_dict() gx, gy = (clust['Rows'], clust['Cols']) fig, axarr = plt.subplots(ncols=gx, nrows=gy, figsize=(10, 10)) for row in axarr: for ax in row: ax.xaxis.set_ticklabels([]) ax.yaxis.set_ticklabels([]) #ax.set_xlim(-ps['xlim'], ps['xlim']) #ax.set_ylim(-ps['ylim'], ps['ylim']) Hcs = [[None for i in range(gx)] for i in range(gy)] Mrs = [[None for i in range(gx)] for i in range(gy)] for f in listdir(root_path): gleaned = ng.glean(f) if gleaned['averaged']: print('Plotting %s' % f) x, y = int(gleaned['x']), int(gleaned['y']) ax = axarr[y, x] Bi, Vi = np.loadtxt(join(root_path, f), usecols=(0, 1), unpack=True, skiprows=7) B,V = tfmr((Bi,Vi),f) B2, V2 = tfmr2((Bi, Vi), f) ##data set 2 graphs lslope,rslope,tan=tfms.x0slope(B2,V2) lsat,rsat=tfms.sat_field(B2,V2) area = tfms.loop_area(B2,V2) data = ax.plot(B2,V2,'k') tanlines = ax.plot(tan[0],tan[1],'r',tan[2],tan[3],'y*',tan[4],tan[5],'b',tan[6],tan[7], 'y*') satfields = ax.plot(B2[lsat],V2[lsat],'ro',B2[rsat],V2[rsat],'go') areatext = ax.text(B2.min(),V2.max(), ("loop area: "+str(area+.0005)[0:6])) rax = plt.axes([0.05, 0.4, 0.1, 0.15]) check = CheckButtons(rax, ('data', 'tangent lines', 'saturation points', 'loop area'), (True, True, True, True)) def func(label): if label == 'data': toggle(data) elif label == 'tangent lines': toggle(tanlines) elif label == 'saturation points': toggle(satfields) elif label == 'loop area': areatext.set_visible(not areatext.get_visible()) plt.draw() check.on_clicked(func) try: Hc = tfms.Hc_of(B, V, fit_int=(ps['thresh'], ps['max'])) Hcs[y][x] = Hc Mr = tfms.Mrem_of(B, V, fit_int=(ps['thresh'], ps['max'])) Mrs[y][x] = Mr zs = np.zeros(3) ax.plot(zs, Mr, 'ro', ms=7) ax.plot(Hc, zs, 'ro', ms=7) except Exception as e: print('\t{}'.format(e)) Hcs[y][x] = 0.0 Mrs[y][x] = 0.0 plt.tight_layout(w_pad=0, h_pad=0) plt.show() Hcs = np.array([x[1] for row in Hcs for x in row]).reshape(gy, gx) Mrs = np.array([x[1] for row in Mrs for x in row]).reshape(gy, gx) gs = GridSpec(10, 10) ax0 = plt.subplot(gs[0:9, :5]) ax1 = plt.subplot(gs[9, :5]) ax2 = plt.subplot(gs[0:9, 5:]) ax3 = plt.subplot(gs[9, 5:]) fig = ax0.get_figure() fig.set_size_inches(12, 8) # Plot Hc pcolor map n = Normalize(vmin=0.0, vmax=5.0, clip=True) res = ax0.pcolor(Hcs, cmap='afmhot', norm=n, edgecolors='k') plt.colorbar(res, cax=ax1, orientation='horizontal', ticks=(0, 2.5, 5)) # Plot Mr pcolor map n = Normalize(vmin=0.0, vmax=1.0, clip=True) res = ax2.pcolor(Mrs, cmap='afmhot', norm=n, edgecolors='k') plt.colorbar(res, cax=ax3, orientation='horizontal', ticks=(0, 0.5, 1)) ax0.set_title('Hc (mT)') ax0.set_aspect('equal', adjustable='box') ax2.set_title('Mrem/Msat') ax2.set_aspect('equal', adjustable='box') plt.tight_layout() plt.show()
def train(args): print("Start Time:\t{}".format(time.ctime())) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model1 = Transformer() model2 = Transformer() state_dict1 = torch.load(args.model1) state_dict2 = torch.load(args.model2) model1.load_state_dict(state_dict1) model2.load_state_dict(state_dict2) model1.to(device) model2.to(device) vgg = VGG16().to(device) train_dataset = datasets.ImageFolder( args.datapath, transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ])) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) transformer = Transformer(norm='instance', padding='reflect').to(device) optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() loss = [] run_time = time.strftime("%d-%H-%M-%S") for epoch_num in range(args.epochs): transformer.train() agg_one_loss = 0.0 agg_two_loss = 0.0 count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() content = x.to(device) y_hat = transformer(content) y_model1 = model1(content) y_model2 = model2(content) features_yh = vgg(normalize(y_hat)) features_y1 = vgg(normalize(y_model1)) features_y2 = vgg(normalize(y_model2)) # Do this but with losses from the output of the VGG blocks # one_loss = mse_loss(y_hat, y_model1) # two_loss = mse_loss(y_hat, y_model2) one_loss = sum( np.array([ mse_loss(feat_yh, feat_y1) for feat_yh, feat_y1 in zip( features_yh.values(), features_y1.values()) ])) two_loss = sum( np.array([ mse_loss(feat_yh, feat_y2) for feat_yh, feat_y2 in zip( features_yh.values(), features_y2.values()) ])) total_loss = one_loss + two_loss total_loss.backward() optimizer.step() agg_one_loss += one_loss.item() agg_two_loss += two_loss.item() if (batch_id + 1) % args.log_interval == 0: mesg = "[{}/{}]\tTotal: {:.2f}\tModel 1: {:.2f}\tModel 2: {:.2f}".format( count, len(train_dataset), (agg_one_loss + agg_two_loss) / (batch_id + 1), agg_one_loss / (batch_id + 1), agg_two_loss / (batch_id + 1), ) print(mesg) loss.append([ batch_id + 1, agg_one_loss / (batch_id + 1), agg_two_loss / (batch_id + 1), (agg_one_loss + agg_two_loss) / (batch_id + 1) ]) if args.checkpoint_dir is not None and ( batch_id + 1) % args.checkpoint_interval == 0: transformer.eval().cpu() ckpt_model_filename = "ckpt_epoch_" + str( epoch_num + 1) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device).train() save_loss_plot( np.array(loss), args.log_dir + '/train_loss{}.jpg'.format(run_time)) # save model and parameter log transformer.eval().cpu() if args.savename is None: save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.strftime("%d-%H-%M-%S")) + ".model" else: save_model_filename = args.savename save_model_path = os.path.join(args.save_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) # save loss in pickle file with open('{}/loss{}'.format(args.log_dir, run_time), 'wb') as fp: pickle.dump(loss, fp) with open('{}/param_log{}.txt'.format(args.log_dir, run_time), 'w') as f: f.write("Epochs: {}\n".format(args.epochs)) f.write("Batch Size: {}\n".format(args.batch_size)) f.write("Dataset: {}\n".format(args.datapath)) f.write("Learning Rate: {}\n".format(args.lr)) f.write("Model 1: {}\n".format(args.model1)) f.write("Model 2: {}\n".format(args.model2)) print("\nDone, trained model saved at", save_model_path)
def convert_blueprint(layers, details, startpos, transform_str, output_mode, output_title, visualize): """ Transforms the provided layers if required by transform_str, then renders keystrokes/macros required to plot or visualize the blueprint specified by layers and details and pursuant to args. """ # apply aliases.txt to blueprint contents # TODO abstract this better alii = aliases.load_aliases( os.path.join(exetest.get_main_dir(), 'config/aliases.txt')) layers = aliases.apply_aliases(layers, alii) # transform the blueprint ztransforms = [] if transform_str: logmsg('transform', 'Transforming with: %s' % transform_str) newphase, transforms, ztransforms = \ transformer.parse_transform_str(transform_str) if newphase is not None: details['build_type'] = buildconfig.get_full_build_type_name(newphase) tran = Transformer(layers, details['start']) tran.transform(transforms) # do the x/y transformations details['start'] = tran.start layers = tran.layers logmsg('file', 'Results of transform:') loglines('file', lambda: FileLayer.str_layers(layers)) layers = FileLayers_to_GridLayers(layers) if not layers: # empty blueprint handling raise BlueprintError("Blueprint appears to be empty.") # override starting position if startpos command line option was given if startpos is not None: details['start'] = parse_startpos(startpos, layers[0].grid.width, layers[0].grid.height) # convert layers and other data to Blueprint bp = Blueprint('', layers, details) # get keys/macrocode to outline or plot the blueprint keys = [] if output_mode == 'csv': bp.analyze() # perform any awaiting z-transforms layers = bp.repeat_ztransforms(ztransforms, bp.layers, Blueprint.repeater_layers) bp.layers = layers output = str(bp) else: if visualize: keys = bp.trace_outline() else: bp.analyze() keys = bp.plot(ztransforms) output = keystroker.convert_keys(keys, output_mode, output_title) loglines('summary', lambda: str_summary(bp, keys)) return output
a[j] = 0 if j == j1: j1 -= 1 else: if j == j0: j0 += 1 p = 0.0 current = 0 i = n while current < x: i -= 1 current += 1 p += a[i] if math.fabs(p - 1) < 1e-10: p = 1 return p sx = [int(x) for x in open("sequence.txt")] if (max(sx) > 1): t = Transformer(sx) s = t.toUniform(0, 1) chi(s) serial(s) gap(s) poker(s) permutation(s) monotonic(s) conflict(s) nb = input()
if j == j1: j1 -= 1 else: if j == j0: j0 += 1 p = 0.0 current = 0 i = n while current < x: i -= 1 current += 1 p += a[i] if math.fabs(p - 1) < 1e-10: p = 1 return p sx = [int(x) for x in open("sequence.txt")] if max(sx) > 1: t = Transformer(sx) s = t.toUniform(0, 1) chi(s) serial(s) gap(s) poker(s) permutation(s) monotonic(s) conflict(s) nb = input()
def evaluate_transformer(): tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(os.path.join(output_path, tag_new_tok + "tokenizer_en_" + str(DICT_SIZE))) tokenizer_de = tfds.features.text.SubwordTextEncoder.load_from_file(os.path.join(output_path, tag_new_tok + "tokenizer_de_" + str(DICT_SIZE))) input_vocab_size = tokenizer_de.vocab_size + 2 target_vocab_size = tokenizer_en.vocab_size + 2 transformer1 = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, rate=dropout_rate) ckpt = tf.train.Checkpoint(transformer1=transformer1) ckpt.restore(tf.train.latest_checkpoint(checkpoint_path)).expect_partial() print('Latest checkpoint restored!!') examples, metadata = tfds.load('wmt14_translate/de-en', data_dir=data_path, with_info=True, as_supervised=True) test_examples = examples['test'] def predict(inp_sentence): start_token = [tokenizer_de.vocab_size] end_token = [tokenizer_de.vocab_size + 1] # inp sentence is german, hence adding the start and end token inp_sentence = start_token + tokenizer_de.encode(inp_sentence) + end_token encoder_input = tf.expand_dims(inp_sentence, 0) # as the target is english, the first word to the transformer should be the # english start token. decoder_input = [tokenizer_en.vocab_size] output = tf.expand_dims(decoder_input, 0) # predictions.shape == (batch_size, seq_len, vocab_size) def symbols_to_logits(output): batched_input = tf.tile(encoder_input, [beam_width, 1]) enc_padding_mask, combined_mask, dec_padding_mask = create_masks( batched_input, output) predictions, attention_weights = transformer1(batched_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) predictions = predictions[:, -1, :] return predictions finished_seq, finished_scores, states= beam_search(symbols_to_logits, output, beam_width, MAX_LENGTH, target_vocab_size, alpha, states=None, eos_id=tokenizer_en.vocab_size+1, stop_early=True, use_tpu=False, use_top_k_with_unique=True) return finished_seq[0, 0, :] def translate(sentence): result = predict(sentence) predicted_sentence = tokenizer_en.decode([i for i in result if i < tokenizer_en.vocab_size]) print('Input: {}'.format(sentence)) print('Predicted translation: {}'.format(predicted_sentence)) return predicted_sentence translations = [] inputs = [] targets = [] BLEUs = [] for sentence in test_examples: inp = sentence[0].numpy().decode('utf-8') target = sentence[1].numpy().decode('utf-8') translation = translate(inp) BLEU = nltk.translate.bleu_score.sentence_bleu([nltk.word_tokenize(target)], nltk.word_tokenize(translation)) translations.append(translation) inputs.append(inp) BLEUs.append(BLEU) print('Average BLEU score: ', 100 * np.mean(BLEUs)) targets.append(target) d = {'input': inputs, 'target': targets, 'translation': translations, 'BLEU': BLEUs} df = pd.DataFrame.from_dict(d) df.to_csv(os.path.join(output_path, 'results_'+experiment_name+'.csv')) print('Average BLEU score: ', 100 * np.mean(BLEUs))
parser.add_argument('--submit', default=False, action='store_true', dest='submit') parser.add_argument('--preconcat', default=False, action='store_true', dest='preconcat') parser.add_argument('--eval', default=False, action='store_true', dest='eval') parser.add_argument('--linear', default=False, action='store_true', dest='linear') parser.add_argument('--bin', default=False, action='store_true', dest='bin') parser.add_argument('--scale', default=False, action='store_true', dest='scale') parser.add_argument('--print-coef', default=False, action='store_true', dest='print_coef') parser.add_argument('--output', default='c1_model.pkl', dest='output') args = parser.parse_args() train_data = pd.read_csv('trainingData-release.csv') submit_data = pd.read_csv('scoringData-release.csv') data = pd.concat([train_data, submit_data], ignore_index = True) transformer = Transformer(include_binned = args.bin, scale = args.scale) X_full = transformer.fit_transform(data) X = X_full[:len(train_data)] X_test = X_full[len(train_data):] y = train_data['resp.simple'].map(lambda x: 1 if x == 'CR' else 0) rf_model = GradientBoostingClassifier(n_estimators = 500) lr_model = LogisticRegression(penalty='l2') model = lr_model if args.linear else rf_model if args.eval: logging.info("Running cross-validation...") eval_model(model, X, y, transformer, args.print_coef)
from prefect import Flow, task from extracter import Extracter from transformer import Transformer from loader import Loader with Flow("ETL") as flow: url = 'https://www.marketbeat.com/stocks/NASDAQ/MSFT/price-target/?MostRecent=0' e = Extracter(url).extract() df = Transformer().transform(text=e) l = Loader().load(df) flow.run()
from transformer import Transformer import torch transformer_model = Transformer(nhead=4, num_encoder_layers=2) src = torch.rand((10, 32, 512)) # 10 is the number of words in the sentence, 32 is the batch size and 512 is the dimensionality of a word?? tgt = torch.rand((20, 32, 512)) out, loss = transformer_model(src, tgt) print(out.shape)
class Manipulation(object): def __init__(self, yaml=None): self.tf = Transformer() moveit_commander.roscpp_initialize(sys.argv) self.__arm_group = moveit_commander.MoveGroupCommander("arm") self.__arm_group.set_planning_time(5) self.__gripper_group = moveit_commander.MoveGroupCommander("gripper") self.__base_group = moveit_commander.MoveGroupCommander("base") self.__base_group.set_planning_time(2.5) self.__arm_base_group = moveit_commander.MoveGroupCommander("arm_base") self.__arm_base_group.set_planning_time(10) rospy.wait_for_service('/plan_kinematic_path') self.__plan_service = rospy.ServiceProxy('/plan_kinematic_path', GetMotionPlan) self.__planning_scene_interface = PlanningSceneInterface() euroc_interface_node = '/euroc_interface_node/' self.__set_object_load_srv = rospy.ServiceProxy(euroc_interface_node + 'set_object_load', SetObjectLoad) self.__manService = ManipulationService() rospy.sleep(1) self.__planning_scene_interface.add_yaml(yaml) self.__grasp = None self.__collision_object_buffer = [] rospy.loginfo("Manipulation started.") def __del__(self): # self.print_manipulation() if moveit_commander is not None: moveit_commander.roscpp_shutdown() moveit_commander.os._exit(0) def set_turbo_mode(self): self.__manService.set_turbo_mode() def print_manipulation(self): # print "current joint state" # print self.get_current_joint_state() # print "current planning scene" # print self.get_planning_scene().get_planning_scene() pass def move_base(self, goal_pose): """ Moves the arm's base to the goal position. (Don't use this for Task 1 and 2) :param goal_pose: goal position :type: PoseStamped :return: success of the movement :type: bool """ goal = [goal_pose.pose.position.x, goal_pose.pose.position.y] rospy.logdebug("Move base to: " + str(goal)) m = self.__base_group.get_current_joint_values() d1 = goal[0] - m[0] d2 = goal[1] - m[1] if 0 <= abs(d1) <= 0.01 and 0 <= abs(d2) <= 0.01: rospy.loginfo("No movement required.") return True self.__base_group.set_joint_value_target(goal) path = self.__base_group.plan() return self.__manService.move(path) def transform_to(self, pose_target, target_frame="/odom_combined"): """ Transforms the pose_target into the target_frame. :param pose_target: object to transform :type: PoseStamped/PointStamped/Vector3Stamped/CollisionObject :param target_frame: goal frame id :type: str :return: transformed object :type: same as pose_target """ return self.tf.transform_to(pose_target, target_frame) def move_to(self, goal_pose, do_not_blow_up_list=()): """ Moves the endeffector to the goal position, without moving the base. :param goal_pose: goal position :type: PoseStamped :return: success of the movement :type: bool """ rospy.logdebug("move_to called!") return self.__move_group_to(goal_pose, self.__arm_group, do_not_blow_up_list) def move_arm_and_base_to(self, goal_pose, do_not_blow_up_list=()): """ Moves the endeffector to the goal position. (Don't use this for Task 1 and 2) :param goal_pose: goal position :type: PoseStamped :return: success of the movement :type: bool """ if type(goal_pose) is PoseStamped and ((math.isnan(goal_pose.pose.orientation.x) or math.isnan(goal_pose.pose.orientation.y) or math.isnan(goal_pose.pose.orientation.z) or math.isnan(goal_pose.pose.orientation.w))): rospy.loginfo('move_arm_and_base to goal pose with nan in orientation!') goal_pose.pose.orientation.x = 0.0 goal_pose.pose.orientation.y = 0.0 goal_pose.pose.orientation.z = 0.0 goal_pose.pose.orientation.w = 1.0 return self.__move_group_to(goal_pose, self.__arm_base_group, do_not_blow_up_list) def __move_group_to(self, goal_pose, move_group, do_not_blow_up_list): """ :param goal_pose: the pose which shall be arrived :type: PoseStamped/str/[float] :param move_group: the move group which shall be moved :type: MoveGroupCommander :param do_not_blow_up_list: list of objects which size should not be increased during planning, "all" for all objects :type: [str] :param blow_up_distance: Distance in m :type: float :return: success :type: bool """ self.blow_up_objects(do_not_blow_up_list) path = self.__plan_group_to(goal_pose, move_group, None) ret = self.move_with_plan_to(path) return ret def move_with_plan_to(self, plan): """ Executes a plan. :param plan: plan to execute :type: GetMotionPlanResponse :return: success :type: bool """ if plan is None: return False if type(plan) is RobotTrajectory: return self.__manService.move(plan) self.blow_down_objects() return self.__manService.move(plan.motion_plan_response.trajectory) def get_timing_for_path(self, path): if path is None: return False if type(path) is RobotTrajectory: timing_list = self.__manService.get_timing(path, path.trajectory.joint_trajectory.points[0].positions) else: timing_list = self.__manService.get_timing(path.motion_plan_response.trajectory, path.motion_plan_response.trajectory.joint_trajectory.points[0].positions) time = timing_list[-1] - timing_list[0] return time def plan_arm_to(self, goal_pose, start_state=None): """ Plans from start_state to goal_pose with the arm group. :param goal_pose: the goal which shall be arrived :type: PoseStamped (eef pose)/str (named pose)/[float] (joint state) :param do_not_blow_up_list: list of objects which size should not be increased during planning, "all" for all objects :type: [str] :param start_state: the robot state from which the the generated plan will start :type: RobotState, None for current state :return: success :type: bool """ return self.__plan_group_to(goal_pose, self.__arm_group, start_state) def plan_arm_and_base_to(self, goal_pose, start_state=None): """ Plans from start_state to goal_pose with the arm_base group. :param goal_pose: the goal which shall be arrived :type: PoseStamped (eef pose)/str (named pose)/[float] (joint state) :param do_not_blow_up_list: list of objects which size should not be increased during planning, "all" for all objects :type: [str] :param start_state: the robot state from which the the generated plan will start :type: RobotState, None for current state :return: success :type: bool """ return self.__plan_group_to(goal_pose, self.__arm_base_group, start_state) def __plan_group_to(self, goal_pose, move_group, start_state): """ Plans from start_state to goal_pose with the move_group group. :param goal_pose: the goal which shall be arrived :type: PoseStamped (eef pose)/str (named pose)/[float] (joint state) :param do_not_blow_up_list: list of objects which size should not be increased during planning, "all" for all objects :type: [str] :param start_state: the robot state from which the the generated plan will start :type: RobotState, None for current state :param blow_up_distance: Distance in m :type: float :return: success :type: bool """ move_group.set_start_state_to_current_state() goal = deepcopy(goal_pose) if type(goal) is str: #use normale planner #TODO use planning service to avoid collisions with attached objects move_group.set_named_target(goal) plan = move_group.plan() return plan elif type(goal) is PoseStamped: visualize_pose(goal) # Rotate the goal so that the gripper points from 0,0,0 to 1,0,0 with a 0,0,0,1 quaternion as orientation. goal.pose.orientation = rotate_quaternion(goal.pose.orientation, pi / 2, pi, pi / 2) if goal.header.frame_id != "/odom_combined": goal = self.tf.transform_to(goal) plan = self.plan(move_group, goal, start_state, max_motion_time) if not plan is None: #plan two times and concatenate plans, to be closer to the goal position plan2 = self.plan(move_group, goal, self.get_end_state(plan), 1) if plan2 is None: return plan plan.motion_plan_response.trajectory.joint_trajectory.points.extend( plan2.motion_plan_response.trajectory.joint_trajectory.points[1:]) return plan def plan(self, move_group, goal, start_state, max_movement_time): """ Generates a plan by calling the MoveIt! service. :param move_group: group to plan with :type: MoveitCommander :param goal: the goal which shall be arrived :type: PoseStamped (eef pose)/ [float] (joint state) :param start_state: the robot state from which the the generated plan will start :type: RobotState, None for current state :return: plan :type: GetMotionPlanResponse or None if no plan found """ request = GetMotionPlanRequest() if start_state is None: request.motion_plan_request.start_state.is_diff = True else: request.motion_plan_request.start_state = start_state request.motion_plan_request.allowed_planning_time = move_group.get_planning_time() request.motion_plan_request.group_name = move_group.get_name() request.motion_plan_request.num_planning_attempts = 1 constraint = Constraints() constraint.name = "muh23" if type(goal) is PoseStamped: pose_goal = self.__make_position_goal(move_group.get_end_effector_link(), goal) constraint.position_constraints.append(pose_goal[0]) constraint.orientation_constraints.append(pose_goal[1]) else: joint_goal = self.__make_joint_state_goal(goal) constraint.joint_constraints.extend(joint_goal) request.motion_plan_request.goal_constraints.append(constraint) request.motion_plan_request.planner_id = "" plans = [] for i in xrange(5): try: resp = self.__plan_service(request) planning_time = self.get_timing_for_path(resp).to_sec() rospy.logdebug("motion time " + str(planning_time)) if planning_time < max_movement_time: plans.append((resp, planning_time)) except rospy.ServiceException as exc: rospy.logdebug("Service did not process request: " + str(exc)) rospy.logdebug("probably couldnt find a plan.") if not plans: rospy.loginfo("no motionplan found") return None best_plan = min(plans, key=lambda (plan, time): time) worst_plan = max(plans, key=lambda (plan, time): time) rospy.loginfo("motion time difference: " + str(worst_plan[1] - best_plan[1])) return best_plan[0] def __make_joint_state_goal(self, goal): """ Helpermethod to create a joint goal out of a joint state :param goal: [float] :return: list of joint goal constraints :type: [JointConstraint] """ joint_goals = [] joint_names = [] if len(goal) == 7: joint_names = self.get_arm_move_group().get_joints() elif len(goal) == 9: joint_names = self.get_arm_base_move_group().get_joints() joint_names = [name for name in joint_names if name != "base_joint"] if len(goal) != len(joint_names): rospy.logwarn("length of joints does not equal length of joint names") for i in xrange(len(goal)): joint_goal = JointConstraint() joint_goal.joint_name = joint_names[i] joint_goal.position = goal[i] joint_goal.tolerance_above = 0.0001 joint_goal.tolerance_above = 0.0001 joint_goal.weight = 1.0 joint_goals.append(joint_goal) return joint_goals def __make_position_goal(self, eef_link, goal): """ Helper method to create a pose goal out of a posestamped. :param eef_link: name of the eef link :type: str :param goal: eef goal position :type: PoseStamped :return: :type: PositionConstraint """ position_tolerance = 0.00001 orientation_tolerance = 0.001 position_goal = PositionConstraint() position_goal.header = goal.header position_goal.link_name = eef_link position_goal.target_point_offset = Vector3() primitive = SolidPrimitive() primitive.type = SolidPrimitive.SPHERE primitive.dimensions.append(position_tolerance) position_goal.constraint_region.primitives.append(primitive) p = Pose() p.position.x = goal.pose.position.x p.position.y = goal.pose.position.y p.position.z = goal.pose.position.z p.orientation.w = 1 position_goal.constraint_region.primitive_poses.append(p) position_goal.weight = 1.0 orientation_goal = OrientationConstraint() orientation_goal.header = goal.header orientation_goal.link_name = eef_link orientation_goal.absolute_x_axis_tolerance = orientation_tolerance orientation_goal.absolute_y_axis_tolerance = orientation_tolerance orientation_goal.absolute_z_axis_tolerance = orientation_tolerance orientation_goal.orientation = goal.pose.orientation orientation_goal.weight = 1.0 return (position_goal, orientation_goal) def get_base_origin(self): """ :return: The centre of the arm's base :type: PointStamped """ current_pose = self.__base_group.get_current_joint_values() result = PointStamped() result.header.frame_id = "/odom_combined" result.point = Point(current_pose[0], current_pose[1], 0) return result def get_eef_position(self): """ :return: The centre of the arm's base :type: PointStamped """ current_pose = self.__arm_group.get_current_pose() return current_pose def blow_up_objects(self, do_not_blow_up_list=(), blow_up_distance=0.015): """ Increases the size of the collision objects in order to create more stable plans. :param do_not_blow_up_list: list of object names that should not be blown up :type: [str] :param blow_up_distance: value by which the objects will be blown up :type: float """ if self.__collision_object_buffer: rospy.loginfo("Tring to blow up objects, before they where blown back down") else: self.__collision_object_buffer = self.__planning_scene_interface.get_collision_objects() #blow shit up if not do_not_blow_up_list is None and "all" not in do_not_blow_up_list: for each in self.__collision_object_buffer: if each.id in do_not_blow_up_list: continue if not each.id in self.__planning_scene_interface.safe_objects: if each.id == "map": bobj = self.__blow_up_map(each) else: bobj = self.__blow_up_object(each, blow_up_distance) self.__planning_scene_interface.add_object(bobj) rospy.sleep(1.5) def blow_down_objects(self): """ Reverts the collision objects back to normal """ if self.__collision_object_buffer: self.__planning_scene_interface.add_objects(self.__collision_object_buffer) self.__collision_object_buffer = [] def __blow_up_object(self, bobject, factor): """ :param bobject: Object to blow up :type: CollisionObject :param factor: Blowup Factor :type: float :return: Returns the Blown up object :type: CollisionObject """ o = deepcopy(bobject) for primitive in o.primitives: dims = [] for dimension in primitive.dimensions: dims.append(dimension + factor) primitive.dimensions = dims return o def __blow_up_map(self, object): """ Special treatment for the map. :param object: map :type: CollisionObject :return: bigger map :type: CollisionObject """ o = deepcopy(object) for primitive in o.primitives: dim = [] dim.append(primitive.dimensions[0] + 0.005) dim.append(primitive.dimensions[1] + 0.005) dim.append(primitive.dimensions[2] + 0.175) primitive.dimensions = dim return o def get_end_state(self, plan_response): """ Extracts the endstate out of a plan. :param plan_response: plan :type: GetMotionPlanResponse :return: end state of the plan :type: RobotState """ r = plan_response robot_state = RobotState() robot_state.multi_dof_joint_state = r.motion_plan_response.trajectory_start.multi_dof_joint_state robot_state.joint_state.header = r.motion_plan_response.trajectory.joint_trajectory.header robot_state.joint_state.name = r.motion_plan_response.trajectory.joint_trajectory.joint_names robot_state.joint_state.position = r.motion_plan_response.trajectory.joint_trajectory.points[-1].positions robot_state.joint_state.velocity = r.motion_plan_response.trajectory.joint_trajectory.points[-1].velocities robot_state.joint_state.effort = r.motion_plan_response.trajectory.joint_trajectory.points[-1].effort robot_state.attached_collision_objects = r.motion_plan_response.trajectory_start.attached_collision_objects return robot_state def get_current_joint_state(self): """ :return: current joint state of the arm_base group. :type: [float] """ return self.__arm_base_group.get_current_joint_values() def get_current_gripper_state(self): """ :return: current joint state :type: [float] """ return self.__gripper_group.get_current_joint_values() def get_current_lwr_joint_state(self): """ :return: current joint state of the arm group. :type: [float] """ return self.__arm_group.get_current_joint_values() def open_gripper(self, position=gripper_max_pose): """ Opens the gripper and detaches any attached collision object. :param position: the desired finger position, max value if not specified. :type: float :return: success of the movement :type: bool """ done = False while not done: try: self.__gripper_group.set_joint_value_target([-position, position]) done = True except MoveItCommanderException: rospy.logdebug("Gripper failed to open") return False path = self.__gripper_group.plan() if self.__manService.move(path): self.__gripper_group.detach_object() self.load_object(0, Vector3(0, 0, 0)) rospy.logdebug("Gripper opened") return True else: rospy.logdebug("Gripper failed to open") return False def close_gripper(self, object=None, grasp_point=None): """ Closes the gripper completely or far enough to hold the object, when one is given :param object: Object that will be grasped. :type: CollisionObject :return: success of the movement :type: bool """ rospy.logdebug("Closing Gripper") if type(object) is CollisionObject: self.__gripper_group.attach_object(object.id, "gp", ["gp", "finger1", "finger2"]) rospy.sleep(1.0) id = get_grasped_part(object, grasp_point)[1] # id = min(range(len(object.primitives)), key=lambda i: min(object.primitives[i].dimensions)) # TODO: only works for cubes and cylinders and only "sometimes" for object compositions if object.primitives[id].type == shape_msgs.msg.SolidPrimitive.BOX: length = min(object.primitives[id].dimensions) self.__gripper_group.set_joint_value_target([-(length / 2), length / 2]) elif object.primitives[id].type == shape_msgs.msg.SolidPrimitive.CYLINDER: radius = object.primitives[id].dimensions[shape_msgs.msg.SolidPrimitive.CYLINDER_RADIUS] if radius >= gripper_max_pose: rospy.logdebug("Object is too big!") return False self.__gripper_group.set_joint_value_target([-radius + 0.005, radius - 0.005]) else: self.__gripper_group.set_joint_value_target([0.0, 0.0]) path = self.__gripper_group.plan() return self.__manService.move(path) def grasp(self, collision_object, object_density=1): """ Deprecated. For testing only """ return self.__grasp_with_group(collision_object, self.__arm_group, object_density) def grasp_and_move(self, collision_object, object_density=1): """ Deprecated. For testing only """ return self.__grasp_with_group(collision_object, self.__arm_base_group, object_density) def __grasp_with_group(self, collision_object_name, move_group, object_density): """ Deprecated. For testing only """ if type(collision_object_name) is CollisionObject: collision_object_name = collision_object_name.id collision_object = self.__planning_scene_interface.get_collision_object(collision_object_name) if collision_object is None: rospy.logwarn("Collision Object " + collision_object_name + " is not in planningscene.") return False grasp_positions = calculate_grasp_position(collision_object, self.tf.transform_to) grasp_positions = self.filter_low_poses(grasp_positions) grasp_positions = self.filter_close_poses(grasp_positions) if len(grasp_positions) == 0: rospy.logwarn("No grasppositions found for " + collision_object_name) grasp_positions.sort(cmp=lambda x, y: self.cmp_pose_stamped(collision_object, x, y)) visualize_poses(grasp_positions) # print grasp_positions self.open_gripper() for grasp in grasp_positions: if self.__move_group_to(get_pre_grasp(self.transform_to(grasp)), move_group, do_not_blow_up_list=("map", collision_object_name)): if not self.__move_group_to(grasp, move_group, do_not_blow_up_list=("map", collision_object_name)): continue rospy.sleep(1) self.close_gripper(collision_object, get_fingertip(self.transform_to(grasp))) # com = self.get_center_of_mass(collision_object) # com = self.tf.transform_to(com, "/tcp") # if com is None: # rospy.logwarn("TF failed") # return False # self.load_object(self.calc_object_weight(collision_object, object_density), # Vector3(com.point.x, com.point.y, com.point.z)) rospy.loginfo("grasped " + collision_object_name) # self.__grasp = self.tf.transform_to(grasp) # v1 = deepcopy(self.__grasp.pose.position) # v1.z = 0 # v2 = deepcopy(collision_object.primitive_poses[0].position) # v2.z = 0 # a = magnitude(subtract_point(v1, v2)) # b = abs(self.__grasp.pose.position.z - collision_object.primitive_poses[0].position.z) # c = sqrt(a ** 2 + b ** 2) # self.__d = abs(c) # print c rospy.logdebug("lift object") if not self.__move_group_to(get_pre_grasp(grasp), move_group, do_not_blow_up_list=("map", collision_object_name)): rospy.logdebug("couldnt lift object") return True rospy.logwarn("Grapsing failed.") return False def cmp_pose_stamped(self, collision_object, pose1, pose2): """ Compares tow poses by calculating the distance to the centre of a collision object and returns -1/0/1 depending on which on is closer. :param collision_object: collision object :type: CollisionObject :param pose1: first pose :type: PoseStamped :param pose2: second pose :type: PoseStamped :return: pose1 > pose2 :type: int """ center = self.get_center_of_mass(collision_object) odom_pose1 = self.tf.transform_to(pose1) p1 = get_fingertip(odom_pose1) odom_pose2 = self.tf.transform_to(pose2) p2 = get_fingertip(odom_pose2) d1 = euclidean_distance(center.point, p1.point) d2 = euclidean_distance(center.point, p2.point) #if the object isnt the handle, put the side graspsfirst if len(collision_object.primitives) == 1: z1 = odom_pose1.pose.position.z z2 = odom_pose2.pose.position.z diff = z2 - z1 return -1 if diff > 0 else 1 if diff < 0 else 0 diff = d1 - d2 #if it is the handle, try to grasp from above first if 0.0 <= abs(diff) <= 0.015: z1 = odom_pose1.pose.position.z z2 = odom_pose2.pose.position.z diff = z2 - z1 return 1 if diff > 0 else -1 if diff < 0 else 0 def filter_low_poses(self, list_of_poses, min_grasp_height=0.1): """ Filters out positions that are very close to the ground. :param list_of_poses: list of poses in odom_combined :type: [PoseStamped] :return: filtered list of PoseStamped :type: [PoseStamped] """ return [pose for pose in list_of_poses if self.tf.transform_to(pose).pose.position.z > min_grasp_height] def filter_close_poses(self, list_of_poses): """ Filters out positions that are very close to the robots base. :param list_of_poses: list of poses in odom_combined :type: [PoseStamped] :return: filtered list of PoseStamped :type: [PoseStamped] """ base = self.get_base_origin() return [pose for pose in list_of_poses if euclidean_distance_in_2d(base.point, self.tf.transform_to(pose).pose.position) > 0.35] def calc_object_weight(self, collision_object, density): """ Calculates the weight of a collision object with the given density :param collision_object: CollisionObject :type: CollisionObject :param density: density :type: float :return: weight :type: float """ return calc_object_volume(collision_object) * density def get_center_of_mass(self, collision_object): """ Calculates the centre of a collision object. :param collision_object: CollisionObject :type: CollisionObject :return: centre of mass :type: PointStamped """ p = PointStamped() p.header.frame_id = "/odom_combined" for pose in collision_object.primitive_poses: p.point = add_point(p.point, pose.position) p.point = multiply_point(1.0 / len(collision_object.primitive_poses), p.point) return p def place(self, destination): """ Deprecated. For testing only """ return self.__place_with_group(destination, self.__arm_group) def place_and_move(self, destination): """ Deprecated. For testing only """ return self.__place_with_group(destination, self.__arm_base_group) def __place_with_group(self, destination, move_group): """ Deprecated. For testing only """ dest = deepcopy(destination) # print dest co = self.__planning_scene_interface.get_attached_object() if co is None: return False else: co = co.object dest = self.tf.transform_to(dest) place_poses = get_place_position(co, dest, self.tf.transform_to, self.__d, self.__grasp) # visualize_poses(place_poses) for place_pose in place_poses: if not self.__move_group_to(get_pre_place_position(place_pose), move_group): rospy.logwarn("Can't reach preplaceposition.") continue if not self.__move_group_to(place_pose, move_group): rospy.logwarn("Can't reach placeposition.") continue rospy.sleep(1) if not self.open_gripper(): return False rospy.sleep(1) post_place_pose = self.tf.transform_to(place_pose, co.id) # post_place_pose.header.frame_id = "/tcp" # post_place_pose.pose.position = Point(0, 0, -post_place_length) if not self.__move_group_to(get_pre_grasp(post_place_pose), move_group): rospy.logwarn("Can't reach postplaceposition.") return True rospy.sleep(0.25) rospy.loginfo("placed " + co.id) return True return False def load_object(self, mass, cog): """ Tells euroc that and object has been grasped. :param mass: mass of the object :type: float :param cog: centre of mass :type: Vector3 :return: response message :type: str """ request = SetObjectLoadRequest() request.mass = mass request.center_of_gravity = cog resp = self.__set_object_load_srv(request) # print resp.error_message return resp def get_planning_scene(self): """ :return: planningscene :type: PlanningScene """ return self.__planning_scene_interface def turn_arm(self, joint_value, joint=0): """ Sets "link1" to "joint_value :param joint_value: radian -2.96 to 2.96 :type: float :return: success of the movement :type: bool """ current_joint_values = self.__arm_group.get_current_joint_values() current_joint_values[joint] = joint_value self.__arm_group.set_joint_value_target(current_joint_values) path = self.__arm_group.plan() return self.__manService.move(path) def get_arm_move_group(self): """ :return: arm move group :type: MoveitCommander """ return self.__arm_group def get_arm_base_move_group(self): return self.__arm_base_group def pan_tilt(self, pan, tilt): """ Moves the scene cam. :param pan: desired pan :type: float :param tilt: desired tilt :type: float :return: success of the movement :type: bool """ return self.__manService.pan_tilt(pan, tilt) def set_planning_time_arm(self, time): """ Sets the planning time of the arm move group. :param time: planning time in sec :type: float :return: success :type: bool """ return self.__arm_group.set_planning_time(time) def set_planning_time_arm_base(self, time): """ Sets the planning time of the arm move group. :param time: planning time in sec :type: float :return: success :type: bool """ return self.__arm_group.set_planning_time(time) def direct_move(self, configuration): """ Uses the euroc service directly to move into the desired configuration. Not collision detection, but faster. :param configuration: desired configuration :type: [float] :return: success :type: bool """ return self.__manService.direct_move(configuration) def scan_conveyor_pose(self): # Initialize DropPoint dp = geometry_msgs.msg.PoseStamped() dp.pose.position.x = 0 dp.pose.position.y = 0 dp.pose.position.z = -0.4 dp.header.frame_id = "/drop_point" dp.pose.orientation = geometry_msgs.msg.Quaternion(0.0, 0.0, 0.0, 1.0) rospy.logdebug('ScanConveyorPose: Transform DropPoint to odom') dp_odom = self.transform_to(dp) scan_conveyor_pose = geometry_msgs.msg.PoseStamped() scan_conveyor_pose.header.frame_id = "/mdl_middle" scan_conveyor_pose.pose.orientation = geometry_msgs.msg.Quaternion(0.0, 0.0, 0.0, 1.0) scan_conveyor_pose.pose.position.x = 0 scan_conveyor_pose.pose.position.y = -0.2 scan_conveyor_pose.pose.position.z = 0 rospy.logdebug('ScanConveyorPose: Transform mdl_middle to odom') mdl_middle_odom = self.transform_to(scan_conveyor_pose) scan_conveyor_pose.pose.position.z = mdl_middle_odom.pose.position.z + 0.3 rospy.logdebug('ScanConveyorPose: Transform scan_conveyor_pose to odom') scan_conveyor_pose = self.transform_to(scan_conveyor_pose) rospy.logdebug('ScanConveyorPose: Calculate quaternion') quaternion = three_points_to_quaternion(scan_conveyor_pose.pose.position, dp_odom.pose.position) scan_conveyor_pose.pose.orientation = quaternion rospy.logdebug(scan_conveyor_pose) return scan_conveyor_pose def plan_to(self, pose): rospy.logdebug('PlanTo: Start planning ik') rospy.logdebug('PlanTo: ' + str(pose)) service = rospy.ServiceProxy("/euroc_interface_node/search_ik_solution", SearchIkSolution) config = Configuration() list = self.get_current_lwr_joint_state() for i in range(len(list)): config.q.append(list[i]) resp = service(config, pose) if resp.error_message: raise PlanningException(resp.error_message) rospy.logdebug('PlanTo: Return ik') return resp.solution def is_gripper_open(self): states = self.__gripper_group.get_current_joint_values() print "Finger: " + str(states) if states[0] < -0.03: return True # print states return False def set_movement_time(self, movement_time): manipulation_constants.max_motion_time = movement_time