def get_article_urls(query='', page=0):
    yielded = []
    url = 'https://www.hmetro.com.my/search?s={}{}'.format(
        query, ''
        if page == 0 or not isinstance(page, int) else '&page={}'.format(page))
    for url in filter(
            map(filter(open_soup(url).find_all('a'), has_key('href')),
                get_key('href')), text_match(r'^/.+?/\d{4}/\d{2}/\d{6}/.+$')):
        url = 'https://www.hmetro.com.my{}'.format(url)
        if url in yielded: continue
        yielded.append(url)
        yield page, url, get_article(open_soup(url))
Exemple #2
0
    def handle(self, sn, d):
        log.info("----------- handle, sn {}, date {} --------------".format(
            sn, self.datetime_index[sn]))

        if sn <= 5:
            log.info("skip first 5 for sma5 history of 5")
            return

        #-------------------------------------------------------------------------
        # example to access watch list and factor/bar
        #-------------------------------------------------------------------------
        W0 = self.watchlists.get(name="W0", until_pos=0)
        sma5 = d.get(name="sma5", tickers=W0)
        # log.info("sma5 : {}\n{}".format(type(sma5), sma5))

        #-------------------------------------------------------------------------
        # process watch list W0 -> W1
        #-------------------------------------------------------------------------
        # get watch list W0 tickers
        W0 = self.watchlists.get(name="W0", until_pos=0)

        sma5 = d.get(name="sma5", tickers=W0)
        bar_close = d.get(name="close", tickers=W0)
        bar_close_2 = d.get(name="close", tickers=W0, pos=-1)

        selected = (sma5 > bar_close) & (sma5 > bar_close_2)
        W1 = utils.filter(W0, selected)

        self.watchlists.set(name="W1", tickers=W1, pos=0)
        log.info("select into W1 : {}".format(W1))

        #-------------------------------------------------------------------------
        # process watch list W1 -> W2
        #-------------------------------------------------------------------------
        # get watch list W0 tickers
        W1 = self.watchlists.get(name="W0", until_pos=-2)

        sma5 = d.get(name="sma5", tickers=W1, pos=-1)
        bar_close = d.get(name="close", tickers=W1, pos=-1)

        selected = (sma5 > bar_close)
        W2 = utils.filter(W1, selected)

        self.watchlists.set(name="W2", tickers=W2, pos=0)
        log.info("select into W2 : {}".format(W2))

        #-------------------------------------------------------------------------
        # for debug only
        #-------------------------------------------------------------------------
        if sn == 15:
            log.info("DEBUG - watchlist : \n{}".format(self.watchlists))
        pass
Exemple #3
0
def search_results():
    search_query = request.args.get("search")
    ranking = request.args.get("rank")
    countries = [
        request.args.get(f"country{i}") for i in range(1, 11)
        if request.args.get(f"country{i}") != None
    ]
    if search_query:
        tweets, suggestions = filter(search_query, ranking, countries)
        countries = "&".join([
            f"country{i}=" + request.args.get(f"country{i}")
            for i in range(1, 11) if request.args.get(f"country{i}") != None
        ])

        if len(suggestions) == 0:
            suggestions = []
        if len(tweets) == 0:
            return render_template('pages/search.html',
                                   search_query=search_query,
                                   error="No tweets Found",
                                   tweets=None,
                                   suggestions=suggestions,
                                   countries=countries)
        return render_template('pages/search.html',
                               search_query=search_query,
                               error=None,
                               tweets=tweets,
                               suggestions=suggestions,
                               countries=countries)

    return render_template('pages/search.html')
Exemple #4
0
def reconstruct(indx=39714):
    l, rp0 = dataset[indx]
    _, rp1 = dataset[indx-1]
    _, rp2 = dataset[indx-2]
    _, rp3 = dataset[indx-3]
    _, rp4 = dataset[indx-4]

    rp = np.concatenate((rp0, rp1, rp2, rp3, rp4))
    rp = filter(rp)

    r0_pc = rp0[:, 3:6]
    r_laser_np = pc_to_laser(rp[:, 3:6])
    # draw_dataset(l, rp)
    # draw_laser(l, r_laser_np)

    r_laser = torch.Tensor(r_laser_np).reshape(1, 1, -1).to(device)

    recon_cgan = net_cgan(r_laser).detach().cpu().numpy()[0]
    recon_cgan = np.clip(recon_cgan, 0, 5)[0]
    recon_vae, _, _ = net_vae(r_laser)
    recon_vae = recon_vae.detach().cpu().numpy()[0]
    recon_vae = np.clip(recon_vae, 0, 5)[0]

    r_t = torch.Tensor(rp0).to(device)
    r_t = torch.unsqueeze(r_t, dim=1)
    recon_transformer = net_transformer(r_t, None)
    recon_transformer = recon_transformer.detach().cpu().numpy()

    return l, r_laser_np, r0_pc, recon_cgan, recon_vae, recon_transformer
Exemple #5
0
 def load(self, d):
     for e in d:
         if e[0] == self.__name__:
             path = d[0][-1]
             print('loading %s: %s' % (self.__name__, path))
             self.recurrent_module.load_state_dict(torch.load(path))
         if e[0] == 'encoder':
             path = d[0][-1]
             print('loading encoder: %s' % path)
             self.encoder.load_state_dict(
                 utils.filter(torch.load(path),
                              ['resnet_features', 'encoder']))
         if e[0] == 'decoder':
             path = d[0][-1]
             print('loading decoder: %s' % path)
             self.decoder.load_state_dict(
                 utils.filter(torch.load(path), ['decoder', 'deconv']))
 def load(self, d):
     for e in d:
         if e[0] == self.__name__:
             path = d[0][-1]
             print("loading %s: %s" % (self.__name__, path))
             self.recurrent_module.load_state_dict(torch.load(path))
         if e[0] == "encoder":
             path = d[0][-1]
             print("loading encoder: %s" % path)
             self.encoder.load_state_dict(
                 utils.filter(torch.load(path),
                              ["resnet_features", "encoder"]))
         if e[0] == "decoder":
             path = d[0][-1]
             print("loading decoder: %s" % path)
             self.decoder.load_state_dict(
                 utils.filter(torch.load(path), ["decoder", "deconv"]))
Exemple #7
0
def filterTweet(tweet):
    tweet = tweet.split()
    filtered_tweet = []
    for word in tweet:
        w = utils.filter(word)
        if w is True:
            continue
        else:
            filtered_tweet.append(w)
    return filtered_tweet
Exemple #8
0
def main():
    #Load Twitter data
    if peregrine:
        tweets = utils.loadAllData('/data/s1774395/', filter=True, save=True)
    else:
        tweets = utils.loadAllData('/home/robert/data/2015123101/',
                                   filter=True,
                                   save=True)

    logger.debug("Number of tweets: %i", len(tweets))

    #get bag of words
    bow = list(utils.getBagOfwords(tweets))

    #get the features
    import numpy as np
    features = np.zeros((len(tweets), len(bow)))
    #for every tweet
    for i in range(0, len(tweets)):
        #for every word in a tweet
        for j in range(0, len(tweets[i])):
            #Stem data, normalize, remove duplicates and only use geo-enabled data
            word = utils.filter(tweets[i][j])
            data[i][j] = word
            if word is not True:
                #for every word in the bagOfWords
                for k in range(0, len(bow)):
                    #if the word is the same
                    if word == bow[k]:
                        #update the features vector
                        logger.info("Update vector")
                        features[i][k] = features[i][k] + 1
        #what happens if word is false?

    # #find the distribution of the BOW
    # f = open('/home/cluster/Dropbox/Master/resources/bow.txt', 'w')
    # distr = features.sum(axis=0, dtype='int')
    # logger.info("The distribution of the Bag of words vector is:%s", distr)
    # for d in distr:
    # 	try:
    # 		f.write(str(d))
    # 	except TypeError:
    # 		import traceback
    # 		traceback.print_exc()
    # 		print d
    # f.close()

    #Train for Topic detection using LDA
    import lda
    lda.LDA(data, scipy.sparse.coo_matrix(features), len(data), len(bow), bow)
def object_container_filter(obj, **kwargs):
    """ Create a list of filter functions for each argument """
    filters = []
    if kwargs.has_key('name'):
        filters.append(lambda x: re.match(kwargs.get('name'), x._name))
    if kwargs.has_key('path'):
        filters.append(lambda x: re.match(kwargs.get('path'), x._path()))
    if kwargs.has_key('type'):
        filters.append(lambda x: isinstance(x, kwargs.get('type')))
    if kwargs.has_key('filters'):
        filters += kwargs.get('filters')
    ret = []
    for sobj in utils.get_list(obj):
        if utils.filter(obj, filters):
            ret.append(sobj)

    return ret
Exemple #10
0
 def sign_inputs(self, inputs, outputs, privKey):
     s_inputs = []
     for i in range(len(inputs)):
         addr, v, p_hash, p_pos, p_scriptPubKey, _ = inputs[i]
         secexp = getSecretFromPrivateKey(privKey)
         private_key = \
             ecdsa.SigningKey.from_secret_exponent(secexp,
                                                   curve=SECP256k1)
         public_key = private_key.get_verifying_key()
         pubkey = getPubKeyFromPrivateKey(privKey)
         tx = filter(self.raw_tx(inputs, outputs, for_sig=i))
         sig = private_key.sign_digest(Hash(tx.decode('hex')),
                                       sigencode=ecdsa.util.sigencode_der)
         assert public_key.verify_digest(sig,
                                         Hash(tx.decode('hex')),
                                         sigdecode=ecdsa.util.sigdecode_der)
         s_inputs.append(
             (addr, v, p_hash, p_pos, p_scriptPubKey, [(pubkey, sig)]))
     return s_inputs
Exemple #11
0
def loadAllData(fdir, value=None, filter=False, save=False):
    import os
    import pickle
    import json
    files = os.listdir(fdir)

    try:
        sentences = pickle.load(open(fdir + "filtered_sentences.p", 'rb'))
        return sentences
    except Exception, err:
        tweets = []
        filtered_tweets = []
        for fname in files:
            logger.info("Reading file: %s", fname)
            if fname.endswith(".out"):
                for line in open(fdir + fname, 'r'):
                    tweet = json.loads(line)['text']
                    if value is None:
                        if filter is True:
                            tweet = tweet.split()
                            filtered_tweet = []
                            for word in tweet:
                                w = utils.filter(word)
                                logger.debug("w has the value %s: ", w)
                                if w is True:
                                    continue
                                else:
                                    filtered_tweet.append(w)
                            filtered_tweets.append(filtered_tweet)
                            logger.debug("Tweet read and filtered: %s",
                                         filtered_tweet)
                        else:
                            tweets.append(tweet)
                            logger.debug("Tweet read and not filtered: %s",
                                         tweet)
                    elif value in tweet:
                        tweets.append(tweet)
Exemple #12
0
def extract_pieces(path, needs_filter, threshold, piece_size):
    pieces = []
    color_images = utils.getImages(path)
    if needs_filter:
        blured_images = utils.median_blur(color_images, 5)
        bws = utils.color_to_bw(blured_images, threshold)
    else:
        bws = utils.color_to_bw(color_images, threshold)
        bws = utils.filter(bw, 2)

    for bw, color_image in zip(bws, color_images):
        (_, cnts, _) = cv2.findContours(bw.copy(), cv2.RETR_LIST,
                                        cv2.CHAIN_APPROX_NONE)
        for contour in cnts:
            bordersize = 15
            x, y, w, h = cv2.boundingRect(contour)
            if (w < piece_size or h < piece_size):
                continue
            new_bw = np.zeros((h + 2 * bordersize, w + 2 * bordersize),
                              dtype='uint8')
            contours_to_draw = []
            contours_to_draw.append(
                utils.translate_contour(contour, bordersize - x,
                                        bordersize - y))
            cv2.drawContours(new_bw, np.asarray(contours_to_draw), -1, 255, -1)
            w += bordersize * 2
            h += bordersize * 2
            x -= bordersize
            y -= bordersize
            mini_color = color_image[y:y + h, x:x + w]
            mini_bw = new_bw
            mini_color = mini_color.copy()
            mini_bw = mini_bw.copy()

            one_piece = piece.create_piece(mini_color, mini_bw, piece_size)
            pieces.append(one_piece)
    return pieces
Exemple #13
0
def save_data():
    print("looking for an EEG stream...")
    streams = resolve_stream('type', 'EEG')
    # create a new inlet to read from the stream
    # here the streams[0] means select the first stream  in the streams
    inlet = StreamInlet(streams[0], max_buflen=60)
    start_state = True
    # global start_state
    while True:
        sample, timestamp = inlet.pull_chunk(timeout=0.0, max_samples=15000)

        if sample:
            if start_state:
                print("please waiting for 5s  the data to stabilize")
                time.sleep(5)
                start_state = False
            else:
                sample_list = sample_arr(sample)  # list
                filter_list = filter(sample_list)  # filter
                down_sample = signal.resample(filter_list, 100)  # down sample
                epoch_buffer.set_raw_data(filter_list)  # save filter data
                epoch_buffer.set_data(down_sample.tolist())  # save down sample
                # print(down_sample[0])
            time.sleep(1)
Exemple #14
0
 def sign_inputs(self, inputs, outputs, privKey):
     s_inputs = []
     for i in range(len(inputs)):
         addr, v, p_hash, p_pos, p_scriptPubKey, _ = inputs[i]
         secexp = getSecretFromPrivateKey(privKey)
         private_key = \
             ecdsa.SigningKey.from_secret_exponent(secexp,
                                                   curve=SECP256k1)
         public_key = private_key.get_verifying_key()
         pubkey = getPubKeyFromPrivateKey(privKey)
         tx = filter(self.raw_tx(inputs, outputs, for_sig=i))
         sig = private_key.sign_digest(Hash(tx.decode('hex')),
                                       sigencode=ecdsa.util.sigencode_der)
         assert public_key.verify_digest(sig,
                                         Hash(tx.decode('hex')),
                                         sigdecode=ecdsa.util.sigdecode_der)
         s_inputs.append((addr,
                          v,
                          p_hash,
                          p_pos,
                          p_scriptPubKey,
                          [(pubkey,
                          sig)]))
     return s_inputs
Exemple #15
0
def best_action(board, last_move, alpha, beta, player, level):
    """Important function for mini-max search with ab-pruning, searching for the best action.
        play=1,representing a max-node
        player=2,representing a min-node

                    Args:
                        board: 20*20 array,representing the current state of the game
                        last_move: tuple, last move conducted by 3-player
                        alpha: float
                        beta: float
                        player: 1 for agent's turn, 2 for opponent's turn
                        level: current searching depth
                    Returns:
                        best_move: tuple
                        best_val:int
    """
    # step1. confirm the candidtates,8-neighbour
    candidates = []
    my_stones, opp_stones = utils.update_stones(board)
    for stone in my_stones:
        candidates += utils.get_blank_neighbors(board, stone, 1)
    for stone in opp_stones:
        candidates += utils.get_blank_neighbors(board, stone, 1)
    candidates = list(set(candidates))  # remove duplicates

    # step2. filter and roughly sort the candidtates
    # player matters a lot
    # sort in order of my advantage
    interested_moves = utils.filter(board, last_move, player, candidates)
    # interested_moves = candidates
    if len(interested_moves) == 1:
        current_move = interested_moves[0]
        val = place_stone(board, current_move, alpha, beta, player, level)
        return current_move, val

    # step3. perform ab-pruning
    best_move = interested_moves[0]  # continue to play even I'm losing
    if player == 1:
        # max policy
        max_val = float("-inf")
        for current_move in interested_moves:
            val = place_stone(board, current_move, alpha, beta, player,
                              level + 1)
            # killing
            if utils.win(board, last_move, current_move, player):
                best_move = current_move
                best_val = val
                break
            if val > alpha:
                alpha = val
            if val > max_val:
                max_val = val
                best_move = current_move
            if beta <= alpha:
                break
        best_val = max_val
    elif player == 2:
        # min policy
        min_val = float("inf")
        for current_move in interested_moves:
            val = place_stone(board, current_move, alpha, beta, player,
                              level + 1)
            # killing
            if utils.win(board, last_move, current_move, player):
                best_move = current_move
                best_val = val
                break
            if val < beta:
                beta = val
            if val < min_val:
                min_val = val
                best_move = current_move
            if beta <= alpha:
                break
        best_val = min_val
    return best_move, best_val
Exemple #16
0
 def signed_tx(self, inputs, outputs, privKey):
     s_inputs = self.sign_inputs(inputs, outputs, privKey)
     tx = filter(self.raw_tx(s_inputs, outputs))
     return tx
Exemple #17
0
def getUserReadableArticleList (user):
    return utils.filter(
        articleMod.getArticleList({}),
        lambda a: checkIfUserCanReadArticle(a, user),
    );
def _apply_filter(obj, filters):
    """ Create a list of filter functions for each argument """
    if utils.filter(obj, filters):
        return [obj]
    else:
        return []
def get_article(soup):
    attribs = {'class': 'field-item even', 'property': 'content:encoded'}
    return '\n\n'.join(
        filter(map(soup.find('div', attribs).find_all('p'), lambda x: x.text),
               is_part_of_article))
# create documents:
# list of tuples with (cat and list of words):
# [([bad, person], neg), ([good, person], pos)]
documents = []
with open('./data/train_r6_test4.csv', newline='', encoding='utf-8') as f:
    reader = csv.reader(f)
    for row in reader:  # one doc
        content = row[0]
        topicLabel = row[1]
        topicName = row[2]

        words = []
        wordsInRow = content.split()
        for w in wordsInRow:
            w = filter(w)
            if w not in vocablSet:
                continue
            words.append(w)
        if words.__len__() < 1:
            continue
        tup = (words, topicName)
        documents.append(tup)
        random.seed(4)
        random.shuffle(documents)

# print(documents)

featuresets = [(document_count_features(d, vocab), c) for (d, c) in documents]
train_set, test_set = featuresets[1000:], featuresets[:1000]
classifier = nltk.NaiveBayesClassifier.train(train_set)
Exemple #21
0
import json
import utils
import time

if __name__=="__main__":
    with open('./config.json') as j:
        res = json.load(j)
        while True:
            for loja in res:
                utils.filter(loja["titulo"], loja["urls"])
Exemple #22
0
def main():
    fname = '/home/cluster/Dropbox/Master/resources/20151201:00.out'

    #Load Twitter data (only load geolocation)
    data, geolocations = utils.loadData(fname, True)
    logger.info("Number of tweets: %i", len(data))

    bow = list(utils.getBagOfwords(data))

    import numpy as np
    features = np.zeros((len(data), len(bow)))
    #for every tweet
    for i in range(0, len(data)):
        #for every word in a tweet
        for j in range(0, len(data[i])):
            #Stem data, normalize, remove duplicates and only use geo-enabled data
            word = utils.filter(data[i][j])
            data[i][j] = word
            if word is not True:
                #for every word in the bagOfWords
                for k in range(0, len(bow)):
                    #if the word is the same
                    if word == bow[k]:
                        #update the features vector
                        logger.info("Update vector")
                        features[i][k] = features[i][k] + 1
        #what happens if word is false?

    #find the distribution of the BOW
    f = open('/home/cluster/Dropbox/Master/resources/bow.txt', 'w')
    distr = features.sum(axis=0, dtype='int')
    logger.info("The distribution of the Bag of words vector is:%s", distr)
    for d in distr:
        try:
            f.write(str(d))
        except TypeError:
            import traceback
            traceback.print_exc()
            print d
    f.close()

    logger.info("The length of the BOW vector is: %s", len(bow))
    logger.info("The length of the data is: %s", len(data))

    #Topic detection using LDA
    import lda
    lda.LDA(data, scipy.sparse.coo_matrix(features), len(data), len(bow), bow)

    #Topic detection using Word2vec (for disambiguation)
    #w2v = word2vecReader.Word2Vec()
    #wname = '/home/cluster/Dropbox/Master/Data/word2vec_twitter.bin'
    #model = w2v.load_word2vec_format(wname, binary=True)

    #model = gensim.models.Word2Vec(data, size=100, window=5, min_count=5, workers=4)

    #tweet = 'hoverboards verboden in nederland'
    #tweet = utils.checkWords(tweet, model)

    #w2vWords = model.most_similar(positive=tweet)
    #logger.info("The most similar words for the tweet are: %s", w2vWords)
    #tags = utils.getTags(w2vWords)
    #logger.info("tags: %s", tags)
    #logger.info("The most similar hashtags for the tweet are: %s", model.most_similar(positive=tags))

    #w2vHashtags = ['#']
    #w2vHashtags = utils.checkWords("".join(w2vHashtags), model)
    #logger.info("The most similar hashtags for the tweet are: %s", model.most_similar(positive=w2vHashtags))

    logger.info("The coordinates for the geolocations are: %s", geolocations)
Exemple #23
0
 def signed_tx(self, inputs, outputs, privKey):
     s_inputs = self.sign_inputs(inputs, outputs, privKey)
     tx = filter(self.raw_tx(s_inputs, outputs))
     return tx
Exemple #24
0
 def test_filter(self):
     self.assertEqual(len(ren.filter('publish', False, index)), 1)
     self.assertEqual(len(ren.filter('format', 'md', index)), 2)
indx = 2872

# dataset = RadarDatasetClass(scene='parking')
# indx = 14547

# dataset = RadarDatasetClass(scene='outdoor')
# indx = 6893

l, rp0 = dataset[indx]
_, rp1 = dataset[indx - 1]
_, rp2 = dataset[indx - 2]
_, rp3 = dataset[indx - 3]
_, rp4 = dataset[indx - 4]

rp = np.concatenate((rp0, rp1, rp2, rp3, rp4))
rp = filter(rp)

r_laser_np = pc_to_laser(rp[:, 3:6])

lasers = [l, r_laser_np]
name = ['radar', 'lidar', 'human command']
show = True
range_limit = 4.9

ax = []

fig = plt.figure(figsize=(8, 8))
for i, l in enumerate(reversed(lasers)):
    angle = 120
    xp = []
    yp = []