def get_article_urls(query='', page=0): yielded = [] url = 'https://www.hmetro.com.my/search?s={}{}'.format( query, '' if page == 0 or not isinstance(page, int) else '&page={}'.format(page)) for url in filter( map(filter(open_soup(url).find_all('a'), has_key('href')), get_key('href')), text_match(r'^/.+?/\d{4}/\d{2}/\d{6}/.+$')): url = 'https://www.hmetro.com.my{}'.format(url) if url in yielded: continue yielded.append(url) yield page, url, get_article(open_soup(url))
def handle(self, sn, d): log.info("----------- handle, sn {}, date {} --------------".format( sn, self.datetime_index[sn])) if sn <= 5: log.info("skip first 5 for sma5 history of 5") return #------------------------------------------------------------------------- # example to access watch list and factor/bar #------------------------------------------------------------------------- W0 = self.watchlists.get(name="W0", until_pos=0) sma5 = d.get(name="sma5", tickers=W0) # log.info("sma5 : {}\n{}".format(type(sma5), sma5)) #------------------------------------------------------------------------- # process watch list W0 -> W1 #------------------------------------------------------------------------- # get watch list W0 tickers W0 = self.watchlists.get(name="W0", until_pos=0) sma5 = d.get(name="sma5", tickers=W0) bar_close = d.get(name="close", tickers=W0) bar_close_2 = d.get(name="close", tickers=W0, pos=-1) selected = (sma5 > bar_close) & (sma5 > bar_close_2) W1 = utils.filter(W0, selected) self.watchlists.set(name="W1", tickers=W1, pos=0) log.info("select into W1 : {}".format(W1)) #------------------------------------------------------------------------- # process watch list W1 -> W2 #------------------------------------------------------------------------- # get watch list W0 tickers W1 = self.watchlists.get(name="W0", until_pos=-2) sma5 = d.get(name="sma5", tickers=W1, pos=-1) bar_close = d.get(name="close", tickers=W1, pos=-1) selected = (sma5 > bar_close) W2 = utils.filter(W1, selected) self.watchlists.set(name="W2", tickers=W2, pos=0) log.info("select into W2 : {}".format(W2)) #------------------------------------------------------------------------- # for debug only #------------------------------------------------------------------------- if sn == 15: log.info("DEBUG - watchlist : \n{}".format(self.watchlists)) pass
def search_results(): search_query = request.args.get("search") ranking = request.args.get("rank") countries = [ request.args.get(f"country{i}") for i in range(1, 11) if request.args.get(f"country{i}") != None ] if search_query: tweets, suggestions = filter(search_query, ranking, countries) countries = "&".join([ f"country{i}=" + request.args.get(f"country{i}") for i in range(1, 11) if request.args.get(f"country{i}") != None ]) if len(suggestions) == 0: suggestions = [] if len(tweets) == 0: return render_template('pages/search.html', search_query=search_query, error="No tweets Found", tweets=None, suggestions=suggestions, countries=countries) return render_template('pages/search.html', search_query=search_query, error=None, tweets=tweets, suggestions=suggestions, countries=countries) return render_template('pages/search.html')
def reconstruct(indx=39714): l, rp0 = dataset[indx] _, rp1 = dataset[indx-1] _, rp2 = dataset[indx-2] _, rp3 = dataset[indx-3] _, rp4 = dataset[indx-4] rp = np.concatenate((rp0, rp1, rp2, rp3, rp4)) rp = filter(rp) r0_pc = rp0[:, 3:6] r_laser_np = pc_to_laser(rp[:, 3:6]) # draw_dataset(l, rp) # draw_laser(l, r_laser_np) r_laser = torch.Tensor(r_laser_np).reshape(1, 1, -1).to(device) recon_cgan = net_cgan(r_laser).detach().cpu().numpy()[0] recon_cgan = np.clip(recon_cgan, 0, 5)[0] recon_vae, _, _ = net_vae(r_laser) recon_vae = recon_vae.detach().cpu().numpy()[0] recon_vae = np.clip(recon_vae, 0, 5)[0] r_t = torch.Tensor(rp0).to(device) r_t = torch.unsqueeze(r_t, dim=1) recon_transformer = net_transformer(r_t, None) recon_transformer = recon_transformer.detach().cpu().numpy() return l, r_laser_np, r0_pc, recon_cgan, recon_vae, recon_transformer
def load(self, d): for e in d: if e[0] == self.__name__: path = d[0][-1] print('loading %s: %s' % (self.__name__, path)) self.recurrent_module.load_state_dict(torch.load(path)) if e[0] == 'encoder': path = d[0][-1] print('loading encoder: %s' % path) self.encoder.load_state_dict( utils.filter(torch.load(path), ['resnet_features', 'encoder'])) if e[0] == 'decoder': path = d[0][-1] print('loading decoder: %s' % path) self.decoder.load_state_dict( utils.filter(torch.load(path), ['decoder', 'deconv']))
def load(self, d): for e in d: if e[0] == self.__name__: path = d[0][-1] print("loading %s: %s" % (self.__name__, path)) self.recurrent_module.load_state_dict(torch.load(path)) if e[0] == "encoder": path = d[0][-1] print("loading encoder: %s" % path) self.encoder.load_state_dict( utils.filter(torch.load(path), ["resnet_features", "encoder"])) if e[0] == "decoder": path = d[0][-1] print("loading decoder: %s" % path) self.decoder.load_state_dict( utils.filter(torch.load(path), ["decoder", "deconv"]))
def filterTweet(tweet): tweet = tweet.split() filtered_tweet = [] for word in tweet: w = utils.filter(word) if w is True: continue else: filtered_tweet.append(w) return filtered_tweet
def main(): #Load Twitter data if peregrine: tweets = utils.loadAllData('/data/s1774395/', filter=True, save=True) else: tweets = utils.loadAllData('/home/robert/data/2015123101/', filter=True, save=True) logger.debug("Number of tweets: %i", len(tweets)) #get bag of words bow = list(utils.getBagOfwords(tweets)) #get the features import numpy as np features = np.zeros((len(tweets), len(bow))) #for every tweet for i in range(0, len(tweets)): #for every word in a tweet for j in range(0, len(tweets[i])): #Stem data, normalize, remove duplicates and only use geo-enabled data word = utils.filter(tweets[i][j]) data[i][j] = word if word is not True: #for every word in the bagOfWords for k in range(0, len(bow)): #if the word is the same if word == bow[k]: #update the features vector logger.info("Update vector") features[i][k] = features[i][k] + 1 #what happens if word is false? # #find the distribution of the BOW # f = open('/home/cluster/Dropbox/Master/resources/bow.txt', 'w') # distr = features.sum(axis=0, dtype='int') # logger.info("The distribution of the Bag of words vector is:%s", distr) # for d in distr: # try: # f.write(str(d)) # except TypeError: # import traceback # traceback.print_exc() # print d # f.close() #Train for Topic detection using LDA import lda lda.LDA(data, scipy.sparse.coo_matrix(features), len(data), len(bow), bow)
def object_container_filter(obj, **kwargs): """ Create a list of filter functions for each argument """ filters = [] if kwargs.has_key('name'): filters.append(lambda x: re.match(kwargs.get('name'), x._name)) if kwargs.has_key('path'): filters.append(lambda x: re.match(kwargs.get('path'), x._path())) if kwargs.has_key('type'): filters.append(lambda x: isinstance(x, kwargs.get('type'))) if kwargs.has_key('filters'): filters += kwargs.get('filters') ret = [] for sobj in utils.get_list(obj): if utils.filter(obj, filters): ret.append(sobj) return ret
def sign_inputs(self, inputs, outputs, privKey): s_inputs = [] for i in range(len(inputs)): addr, v, p_hash, p_pos, p_scriptPubKey, _ = inputs[i] secexp = getSecretFromPrivateKey(privKey) private_key = \ ecdsa.SigningKey.from_secret_exponent(secexp, curve=SECP256k1) public_key = private_key.get_verifying_key() pubkey = getPubKeyFromPrivateKey(privKey) tx = filter(self.raw_tx(inputs, outputs, for_sig=i)) sig = private_key.sign_digest(Hash(tx.decode('hex')), sigencode=ecdsa.util.sigencode_der) assert public_key.verify_digest(sig, Hash(tx.decode('hex')), sigdecode=ecdsa.util.sigdecode_der) s_inputs.append( (addr, v, p_hash, p_pos, p_scriptPubKey, [(pubkey, sig)])) return s_inputs
def loadAllData(fdir, value=None, filter=False, save=False): import os import pickle import json files = os.listdir(fdir) try: sentences = pickle.load(open(fdir + "filtered_sentences.p", 'rb')) return sentences except Exception, err: tweets = [] filtered_tweets = [] for fname in files: logger.info("Reading file: %s", fname) if fname.endswith(".out"): for line in open(fdir + fname, 'r'): tweet = json.loads(line)['text'] if value is None: if filter is True: tweet = tweet.split() filtered_tweet = [] for word in tweet: w = utils.filter(word) logger.debug("w has the value %s: ", w) if w is True: continue else: filtered_tweet.append(w) filtered_tweets.append(filtered_tweet) logger.debug("Tweet read and filtered: %s", filtered_tweet) else: tweets.append(tweet) logger.debug("Tweet read and not filtered: %s", tweet) elif value in tweet: tweets.append(tweet)
def extract_pieces(path, needs_filter, threshold, piece_size): pieces = [] color_images = utils.getImages(path) if needs_filter: blured_images = utils.median_blur(color_images, 5) bws = utils.color_to_bw(blured_images, threshold) else: bws = utils.color_to_bw(color_images, threshold) bws = utils.filter(bw, 2) for bw, color_image in zip(bws, color_images): (_, cnts, _) = cv2.findContours(bw.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) for contour in cnts: bordersize = 15 x, y, w, h = cv2.boundingRect(contour) if (w < piece_size or h < piece_size): continue new_bw = np.zeros((h + 2 * bordersize, w + 2 * bordersize), dtype='uint8') contours_to_draw = [] contours_to_draw.append( utils.translate_contour(contour, bordersize - x, bordersize - y)) cv2.drawContours(new_bw, np.asarray(contours_to_draw), -1, 255, -1) w += bordersize * 2 h += bordersize * 2 x -= bordersize y -= bordersize mini_color = color_image[y:y + h, x:x + w] mini_bw = new_bw mini_color = mini_color.copy() mini_bw = mini_bw.copy() one_piece = piece.create_piece(mini_color, mini_bw, piece_size) pieces.append(one_piece) return pieces
def save_data(): print("looking for an EEG stream...") streams = resolve_stream('type', 'EEG') # create a new inlet to read from the stream # here the streams[0] means select the first stream in the streams inlet = StreamInlet(streams[0], max_buflen=60) start_state = True # global start_state while True: sample, timestamp = inlet.pull_chunk(timeout=0.0, max_samples=15000) if sample: if start_state: print("please waiting for 5s the data to stabilize") time.sleep(5) start_state = False else: sample_list = sample_arr(sample) # list filter_list = filter(sample_list) # filter down_sample = signal.resample(filter_list, 100) # down sample epoch_buffer.set_raw_data(filter_list) # save filter data epoch_buffer.set_data(down_sample.tolist()) # save down sample # print(down_sample[0]) time.sleep(1)
def sign_inputs(self, inputs, outputs, privKey): s_inputs = [] for i in range(len(inputs)): addr, v, p_hash, p_pos, p_scriptPubKey, _ = inputs[i] secexp = getSecretFromPrivateKey(privKey) private_key = \ ecdsa.SigningKey.from_secret_exponent(secexp, curve=SECP256k1) public_key = private_key.get_verifying_key() pubkey = getPubKeyFromPrivateKey(privKey) tx = filter(self.raw_tx(inputs, outputs, for_sig=i)) sig = private_key.sign_digest(Hash(tx.decode('hex')), sigencode=ecdsa.util.sigencode_der) assert public_key.verify_digest(sig, Hash(tx.decode('hex')), sigdecode=ecdsa.util.sigdecode_der) s_inputs.append((addr, v, p_hash, p_pos, p_scriptPubKey, [(pubkey, sig)])) return s_inputs
def best_action(board, last_move, alpha, beta, player, level): """Important function for mini-max search with ab-pruning, searching for the best action. play=1,representing a max-node player=2,representing a min-node Args: board: 20*20 array,representing the current state of the game last_move: tuple, last move conducted by 3-player alpha: float beta: float player: 1 for agent's turn, 2 for opponent's turn level: current searching depth Returns: best_move: tuple best_val:int """ # step1. confirm the candidtates,8-neighbour candidates = [] my_stones, opp_stones = utils.update_stones(board) for stone in my_stones: candidates += utils.get_blank_neighbors(board, stone, 1) for stone in opp_stones: candidates += utils.get_blank_neighbors(board, stone, 1) candidates = list(set(candidates)) # remove duplicates # step2. filter and roughly sort the candidtates # player matters a lot # sort in order of my advantage interested_moves = utils.filter(board, last_move, player, candidates) # interested_moves = candidates if len(interested_moves) == 1: current_move = interested_moves[0] val = place_stone(board, current_move, alpha, beta, player, level) return current_move, val # step3. perform ab-pruning best_move = interested_moves[0] # continue to play even I'm losing if player == 1: # max policy max_val = float("-inf") for current_move in interested_moves: val = place_stone(board, current_move, alpha, beta, player, level + 1) # killing if utils.win(board, last_move, current_move, player): best_move = current_move best_val = val break if val > alpha: alpha = val if val > max_val: max_val = val best_move = current_move if beta <= alpha: break best_val = max_val elif player == 2: # min policy min_val = float("inf") for current_move in interested_moves: val = place_stone(board, current_move, alpha, beta, player, level + 1) # killing if utils.win(board, last_move, current_move, player): best_move = current_move best_val = val break if val < beta: beta = val if val < min_val: min_val = val best_move = current_move if beta <= alpha: break best_val = min_val return best_move, best_val
def signed_tx(self, inputs, outputs, privKey): s_inputs = self.sign_inputs(inputs, outputs, privKey) tx = filter(self.raw_tx(s_inputs, outputs)) return tx
def getUserReadableArticleList (user): return utils.filter( articleMod.getArticleList({}), lambda a: checkIfUserCanReadArticle(a, user), );
def _apply_filter(obj, filters): """ Create a list of filter functions for each argument """ if utils.filter(obj, filters): return [obj] else: return []
def get_article(soup): attribs = {'class': 'field-item even', 'property': 'content:encoded'} return '\n\n'.join( filter(map(soup.find('div', attribs).find_all('p'), lambda x: x.text), is_part_of_article))
# create documents: # list of tuples with (cat and list of words): # [([bad, person], neg), ([good, person], pos)] documents = [] with open('./data/train_r6_test4.csv', newline='', encoding='utf-8') as f: reader = csv.reader(f) for row in reader: # one doc content = row[0] topicLabel = row[1] topicName = row[2] words = [] wordsInRow = content.split() for w in wordsInRow: w = filter(w) if w not in vocablSet: continue words.append(w) if words.__len__() < 1: continue tup = (words, topicName) documents.append(tup) random.seed(4) random.shuffle(documents) # print(documents) featuresets = [(document_count_features(d, vocab), c) for (d, c) in documents] train_set, test_set = featuresets[1000:], featuresets[:1000] classifier = nltk.NaiveBayesClassifier.train(train_set)
import json import utils import time if __name__=="__main__": with open('./config.json') as j: res = json.load(j) while True: for loja in res: utils.filter(loja["titulo"], loja["urls"])
def main(): fname = '/home/cluster/Dropbox/Master/resources/20151201:00.out' #Load Twitter data (only load geolocation) data, geolocations = utils.loadData(fname, True) logger.info("Number of tweets: %i", len(data)) bow = list(utils.getBagOfwords(data)) import numpy as np features = np.zeros((len(data), len(bow))) #for every tweet for i in range(0, len(data)): #for every word in a tweet for j in range(0, len(data[i])): #Stem data, normalize, remove duplicates and only use geo-enabled data word = utils.filter(data[i][j]) data[i][j] = word if word is not True: #for every word in the bagOfWords for k in range(0, len(bow)): #if the word is the same if word == bow[k]: #update the features vector logger.info("Update vector") features[i][k] = features[i][k] + 1 #what happens if word is false? #find the distribution of the BOW f = open('/home/cluster/Dropbox/Master/resources/bow.txt', 'w') distr = features.sum(axis=0, dtype='int') logger.info("The distribution of the Bag of words vector is:%s", distr) for d in distr: try: f.write(str(d)) except TypeError: import traceback traceback.print_exc() print d f.close() logger.info("The length of the BOW vector is: %s", len(bow)) logger.info("The length of the data is: %s", len(data)) #Topic detection using LDA import lda lda.LDA(data, scipy.sparse.coo_matrix(features), len(data), len(bow), bow) #Topic detection using Word2vec (for disambiguation) #w2v = word2vecReader.Word2Vec() #wname = '/home/cluster/Dropbox/Master/Data/word2vec_twitter.bin' #model = w2v.load_word2vec_format(wname, binary=True) #model = gensim.models.Word2Vec(data, size=100, window=5, min_count=5, workers=4) #tweet = 'hoverboards verboden in nederland' #tweet = utils.checkWords(tweet, model) #w2vWords = model.most_similar(positive=tweet) #logger.info("The most similar words for the tweet are: %s", w2vWords) #tags = utils.getTags(w2vWords) #logger.info("tags: %s", tags) #logger.info("The most similar hashtags for the tweet are: %s", model.most_similar(positive=tags)) #w2vHashtags = ['#'] #w2vHashtags = utils.checkWords("".join(w2vHashtags), model) #logger.info("The most similar hashtags for the tweet are: %s", model.most_similar(positive=w2vHashtags)) logger.info("The coordinates for the geolocations are: %s", geolocations)
def test_filter(self): self.assertEqual(len(ren.filter('publish', False, index)), 1) self.assertEqual(len(ren.filter('format', 'md', index)), 2)
indx = 2872 # dataset = RadarDatasetClass(scene='parking') # indx = 14547 # dataset = RadarDatasetClass(scene='outdoor') # indx = 6893 l, rp0 = dataset[indx] _, rp1 = dataset[indx - 1] _, rp2 = dataset[indx - 2] _, rp3 = dataset[indx - 3] _, rp4 = dataset[indx - 4] rp = np.concatenate((rp0, rp1, rp2, rp3, rp4)) rp = filter(rp) r_laser_np = pc_to_laser(rp[:, 3:6]) lasers = [l, r_laser_np] name = ['radar', 'lidar', 'human command'] show = True range_limit = 4.9 ax = [] fig = plt.figure(figsize=(8, 8)) for i, l in enumerate(reversed(lasers)): angle = 120 xp = [] yp = []