Exemple #1
0
 def on_data(self, data):
     try:
         if self.count < self.limit:
             tweet = json.loads(data)
             if tweet['lang'] == 'en' and tweet['user'].get('location') is not None:
                 place = tweet['user'].get('location')
                 if place:
                     tweet_id = str(tweet['id'])
                     geocode_result = gmaps.geocode(place)
                     lat = geocode_result[0]['geometry']['location']['lat']
                     lng = geocode_result[0]['geometry']['location']['lng']
                     tweet_text = tweet['text'].lower().encode('ascii', 'ignore').decode('ascii')
                     raw_tweet = {
                         'user': tweet['user']['screen_name'],
                         'text': tweet_text,
                         'place': place,
                         'coordinates': {'location': str(lat)+","+str(lng)}, 
                         'time': tweet['created_at'],
                         'category': get_category(tweet_text)
                     }
                     es.index(index=ES_INDEX, doc_type=ES_TYPE, id=tweet_id, body=raw_tweet)
             self.count += 1
         else:
             stream.disconnect()
     except Exception as e:
         pass
Exemple #2
0
def review_revision(request, revision_id):
    revision = TextbookCompanionRevision.objects.using('scilab').get(
        id=revision_id)
    file = utils.get_file(revision.example_file.filepath,
                          revision.commit_sha,
                          main_repo=False)
    code = base64.b64decode(file['content'])

    request.session['revision_id'] = revision_id

    example = revision.example_file.example
    chapter = example.chapter
    book = chapter.preference
    category = utils.get_category(book.category)

    data = {
        'code': code,
        'revision': model_to_dict(revision),
        'example': model_to_dict(example),
        'chapter': model_to_dict(chapter),
        'book': model_to_dict(book),
        'category': category,
        'createdAt': str(revision.timestamp),
    }
    return simplejson.dumps(data)
Exemple #3
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    cache_dir = utils.get_cache_dir(config)
    os.makedirs(cache_dir, exist_ok=True)
    shutil.copyfile(
        os.path.expanduser(os.path.expandvars(config.get('cache',
                                                         'category'))),
        os.path.join(cache_dir, 'category'))
    category = utils.get_category(config)
    category_index = dict([(name, i) for i, name in enumerate(category)])
    datasets = config.get('cache', 'datasets').split()
    for phase in args.phase:
        path = os.path.join(cache_dir, phase) + '.pkl'
        logging.info('save cache file: ' + path)
        data = []
        for dataset in datasets:
            logging.info('load %s dataset' % dataset)
            module, func = dataset.rsplit('.', 1)
            module = importlib.import_module(module)
            func = getattr(module, func)
            data += func(config, path, category_index)
        if config.getboolean('cache', 'shuffle'):
            random.shuffle(data)
        with open(path, 'wb') as f:
            pickle.dump(data, f)
    logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
Exemple #4
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    cache_dir = utils.get_cache_dir(config)
    os.makedirs(cache_dir, exist_ok=True)
    shutil.copyfile(os.path.expanduser(os.path.expandvars(config.get('cache', 'category'))), os.path.join(cache_dir, 'category'))
    category = utils.get_category(config)
    category_index = dict([(name, i) for i, name in enumerate(category)])
    datasets = config.get('cache', 'datasets').split()
    for phase in args.phase:
        path = os.path.join(cache_dir, phase) + '.pkl'
        logging.info('save cache file: ' + path)
        data = []
        for dataset in datasets:
            logging.info('load %s dataset' % dataset)
            module, func = dataset.rsplit('.', 1)
            module = importlib.import_module(module)
            func = getattr(module, func)
            data += func(config, path, category_index)
        if config.getboolean('cache', 'shuffle'):
            random.shuffle(data)
        with open(path, 'wb') as f:
            pickle.dump(data, f)
    logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    cache_dir = utils.get_cache_dir(config)
    model_dir = utils.get_model_dir(config)
    category = utils.get_category(
        config, cache_dir if os.path.exists(cache_dir) else None)
    anchors = utils.get_anchors(config)
    anchors = torch.from_numpy(anchors).contiguous()
    path, step, epoch = utils.train.load_model(model_dir)
    state_dict = torch.load(path, map_location=lambda storage, loc: storage)
    dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels(
        config, state_dict), anchors, len(category))
    dnn.load_state_dict(state_dict)
    height, width = tuple(map(int, config.get('image', 'size').split()))
    resize = transform.parse_transform(config,
                                       config.get('transform', 'resize_test'))
    transform_image = transform.get_transform(
        config,
        config.get('transform', 'image_test').split())
    transform_tensor = transform.get_transform(
        config,
        config.get('transform', 'tensor').split())
    # load image
    image_bgr = cv2.imread('image.jpg')
    image_resized = resize(image_bgr, height, width)
    image = transform_image(image_resized)
    tensor = transform_tensor(image).unsqueeze(0)
    # Checksum
    for key, var in dnn.state_dict().items():
        a = var.cpu().numpy()
        print('\t'.join(
            map(str, [
                key, a.shape,
                utils.abs_mean(a),
                hashlib.md5(a.tostring()).hexdigest()
            ])))
    output = dnn(torch.autograd.Variable(tensor, volatile=True)).data
    for key, a in [
        ('image_bgr', image_bgr),
        ('image_resized', image_resized),
        ('tensor', tensor.cpu().numpy()),
        ('output', output.cpu().numpy()),
    ]:
        print('\t'.join(
            map(str, [
                key, a.shape,
                utils.abs_mean(a),
                hashlib.md5(a.tostring()).hexdigest()
            ])))
Exemple #6
0
def process_cmd_comments():
    try:
        queue_item = QUEUE_COMMENTS.get_nowait()
    except queue.Empty:
        return

    comment: Comment = queue_item[0]
    cmd_str = comment["body"]
    logger.debug(cmd_str)
    parsed_cmd = parse_command(cmd_str)
    if parsed_cmd is None:
        logger.info("No command found in %s", comment["url"])
        QUEUE_COMMENTS.task_done()
        return
    if parsed_cmd["help"] is not None and comment["author"] != ACCOUNT:
        if not replied_to_comment(comment, ACCOUNT):
            if reply_message(comment, MESSAGES["HELP"], ACCOUNT):
                logger.info("Help message replied to %s", comment["url"])
            else:
                logger.info("Couldn't reply to %s", comment["url"])
        else:
            logger.info("Already replied with help command to %s",
                        comment["url"])
        QUEUE_COMMENTS.task_done()
        return
    if parsed_cmd["help"] is None and parsed_cmd.get("status") is None:
        if len([x for x in parsed_cmd if parsed_cmd[x] is not None
                ]) > 1 and not replied_to_comment(comment, ACCOUNT):
            if reply_message(comment, MESSAGES["STATUS_MISSING"], ACCOUNT):
                logger.info("Missing status parameter message sent to %s",
                            comment["url"])
            else:
                logger.info("Couldn't reply to %s", comment["url"])
        QUEUE_COMMENTS.task_done()
        return
    root_comment = queue_item[1]
    category = get_category(root_comment, TASKS_PROPERTIES)
    if category is None:
        logger.info("No valid category found. %s", root_comment["url"])
        QUEUE_COMMENTS.task_done()
        return

    if ACCOUNT:
        reply = replied_to_comment(root_comment, ACCOUNT)
        send_summary_to_steem(parsed_cmd, reply, root_comment)

    if DISCORD_WEBHOOK_TASKS:
        content = (
            f'[{parsed_cmd["status"].upper()}] <{build_comment_link(root_comment)}>'
        )
        embeds = [build_discord_tr_embed(root_comment, parsed_cmd)]
        send_message_to_discord(DISCORD_WEBHOOK_TASKS, content, embeds)
    QUEUE_COMMENTS.task_done()
    def to_python(self, data_row):
        studygorup = dict(
            hours=data_row[1],
            start=data_row[2],
            end=data_row[3],
        )
        studygroup_query =  studygorup.copy()
        studygroup_query.update(dict(
            subject__short_name=data_row[0],
            department__name=data_row[4],
        ))

        subject = dict(
            short_name=data_row[0],
        )

        department = dict(
            name=data_row[4],
        )

        organization_name = data_row[6]
        match = re.findall(r'\d+', organization_name)
        organization_cast = get_organization_type(organization_name)
        organization = dict(
            name=organization_name,
            number=match[0] if match else None,
            cast=organization_cast,
        )

        last_name, first_name, patronymic = data_row[5]

        listener_position=get_position_fuzzy(data_row[7])
        listener = dict(
            first_name_inflated=first_name,
            last_name_inflated=last_name,
            patronymic_inflated=patronymic,
            position=listener_position,
            profile=get_profile_fuzzy(data_row[7]),
            category=get_category(organization_cast, listener_position),
        )

        attestation_work_name = data_row[8]
        cert_number = data_row[9]
        return {
            'studygroup': studygorup,
            'studygroup_query': studygroup_query,
            'department': department,
            'subject': subject,
            'listener': listener,
            'organization': organization,
            'attestation_work_name': attestation_work_name,
            'cert_number': cert_number,
        }
Exemple #8
0
    def split_train_data(self, paths, ratio=None):
        '''
        Get splitted data paths.
        '''

        if ratio == None:
            ratio = 0.05 / (1 - 1.0 / len(self.fold_itens))

        # Get data files.
        brains_data = Files("")
        brains_data.paths = paths
        brains_data = brains_data.get_file_names()
        total_brains = len(brains_data)
        validation_size = np.int(total_brains * ratio)

        # Get total of brains by category.
        brains_by_patient = group_brains_by_patient_id(brains_data)
        validation_paths = []
        train_paths = []
        brains_by_category = group_brains_by_category(brains_data)
        statistic = {}

        # Initialize statistic data.
        for label in brains_by_category:
            statistic[label] = np.round(
                len(brains_by_category[label]) * validation_size * 1.0 /
                total_brains)

        # Create train and validation set.
        for label in brains_by_category:
            index_brains = 0

            while statistic[label] > 0:
                patient_id = get_patient_id(
                    brains_by_category[label][index_brains])
                brains_patient = brains_by_patient[patient_id]

                for brain_patient in brains_patient:
                    validation_paths.append(brain_patient)
                    statistic[get_category(brain_patient)] -= 1

                index_brains += 1

        for brain in brains_data:
            if brain not in validation_paths:
                train_paths.append(brain)

        # Shuffle data.
        np.random.shuffle(np.array(validation_paths))
        np.random.shuffle(np.array(train_paths))

        return train_paths, validation_paths
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    model_dir = utils.get_model_dir(config)
    category = utils.get_category(config)
    anchors = torch.from_numpy(utils.get_anchors(config)).contiguous()
    path, step, epoch = utils.train.load_model(model_dir)
    state_dict = torch.load(path, map_location=lambda storage, loc: storage)
    _model = utils.parse_attr(config.get('model', 'dnn'))
    dnn = _model(model.ConfigChannels(config, state_dict), anchors,
                 len(category))
    logging.info(
        humanize.naturalsize(
            sum(var.cpu().numpy().nbytes
                for var in dnn.state_dict().values())))
    dnn.load_state_dict(state_dict)
    height, width = tuple(map(int, config.get('image', 'size').split()))
    image = torch.autograd.Variable(
        torch.randn(args.batch_size, 3, height, width))
    output = dnn(image)
    state_dict = dnn.state_dict()
    d = utils.dense(state_dict[args.name])
    keep = torch.LongTensor(np.argsort(d)[:int(len(d) * args.keep)])
    modifier = utils.channel.Modifier(
        args.name,
        state_dict,
        dnn,
        lambda name, var: var[keep],
        lambda name, var, mapper: var[mapper(keep, len(d))],
        debug=args.debug,
    )
    modifier(output.grad_fn)
    if args.debug:
        path = modifier.dot.view(
            '%s.%s.gv' % (os.path.basename(model_dir),
                          os.path.basename(os.path.splitext(__file__)[0])),
            os.path.dirname(model_dir))
        logging.info(path)
    assert len(keep) == len(state_dict[args.name])
    dnn = _model(model.ConfigChannels(config, state_dict), anchors,
                 len(category))
    dnn.load_state_dict(state_dict)
    dnn(image)
    if not args.debug:
        torch.save(state_dict, path)
Exemple #10
0
def main():
    while True:
        try:
            queue_item = QUEUE_COMMENTS.get_nowait()
        except queue.Empty:
            continue

        comment: Comment = queue_item[0]
        cmd_str = comment["body"]
        LOGGER.debug(cmd_str)
        parsed_cmd = parse_command(cmd_str)
        if parsed_cmd is None:
            LOGGER.info("No command found")
            QUEUE_COMMENTS.task_done()
            continue
        elif parsed_cmd["help"] is not None and comment["author"] != ACCOUNT:
            replied = False
            for reply in comment.get_replies():
                if reply["author"] == ACCOUNT:
                    LOGGER.info("Already replied with help command. %s", comment["url"])
                    replied = True
                    break
            if not replied:
                send_help_message(comment, ACCOUNT)
            QUEUE_COMMENTS.task_done()
            continue
        if parsed_cmd.get("status") is None:
            if len([x for x in parsed_cmd if parsed_cmd[x] is not None]) > 1:
                send_missing_status_message(comment, ACCOUNT)
            QUEUE_COMMENTS.task_done()
            continue
        root_comment = queue_item[1]
        category = get_category(root_comment, TASKS_PROPERTIES)
        if category is None:
            LOGGER.info("No valid category found. %s", root_comment["url"])
            QUEUE_COMMENTS.task_done()
            continue
        category = TASKS_PROPERTIES[category]["category"]
        webhook = DiscordWebhook(
            url=DISCORD_WEBHOOK_TASKS,
            content=f'[{category.upper()}][{parsed_cmd["status"].upper()}] <{build_comment_link(root_comment)}>',
        )
        webhook.add_embed(build_discord_tr_embed(root_comment, parsed_cmd))
        webhook.execute()
        QUEUE_COMMENTS.task_done()
Exemple #11
0
def follow_euro_2016(user_data, callback, data, delta=60):
    # logging.info('following Euro2016')
    # goal at 22:17
    current_time = dateutil.parser.parse('06/10/2016 22:14:00 +0200')

    current_counter = Counter()
    last_goal = None
    min_time = datetime.timedelta(minutes=4)
    previous_score = 0, 0

    while True:
        tweets = get_tweets_around(current_time, data, delta)

        categories = [get_category(tweet['text']) for tweet in tweets]
        counter = Counter(categories)

        # logging.info('%s %s', current_time, counter)
        if (counter['BUT'] > current_counter['BUT'] + 50 and
            (last_goal is None or current_time >= last_goal + min_time)):
            # for tweet in random.sample(tweets, 10):
            #    logging.debug(tweet['text'])
            scores = parse_tweets(tweets)
            scores = Counter(scores)
            if scores:
                country1, score1, country2, score2 = max(scores,
                                                         key=scores.get)
                score1 = int(score1)
                score2 = int(score2)

                prev_score1, prev_score2 = previous_score
                if score1 - prev_score1 + score2 - prev_score2 == 1:
                    scorer = country1 if score1 > prev_score1 else country2

                    previous_score = score1, score2
                    yield country1, score1, country2, score2, scorer
                    last_goal = current_time

        time.sleep(delta)
        current_time += datetime.timedelta(seconds=delta)
        current_counter = counter
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    model_dir = utils.get_model_dir(config)
    category = utils.get_category(config)
    anchors = torch.from_numpy(utils.get_anchors(config)).contiguous()
    try:
        path, step, epoch = utils.train.load_model(model_dir)
        state_dict = torch.load(path,
                                map_location=lambda storage, loc: storage)
    except (FileNotFoundError, ValueError):
        logging.warning('model cannot be loaded')
        state_dict = None
    dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels(
        config, state_dict), anchors, len(category))
    logging.info(
        humanize.naturalsize(
            sum(var.cpu().numpy().nbytes
                for var in dnn.state_dict().values())))
    if state_dict is not None:
        dnn.load_state_dict(state_dict)
    height, width = tuple(map(int, config.get('image', 'size').split()))
    image = torch.autograd.Variable(
        torch.randn(args.batch_size, 3, height, width))
    output = dnn(image)
    state_dict = dnn.state_dict()
    graph = utils.visualize.Graph(config, state_dict)
    graph(output.grad_fn)
    diff = [key for key in state_dict if key not in graph.drawn]
    if diff:
        logging.warning('variables not shown: ' + str(diff))
    path = graph.dot.view(
        os.path.basename(model_dir) + '.gv', os.path.dirname(model_dir))
    logging.info(path)
Exemple #13
0
    def __getitem__(self, idx):
        i = idx * batch_size

        length = min(batch_size, (len(self.ids) - i))
        X = np.empty((length, img_rows, img_cols, 3), dtype=np.float32)
        Y = np.empty((length, img_rows, img_cols, num_classes),
                     dtype=np.float32)

        for i_batch in range(length):
            id = self.ids[i + i_batch]
            name = self.names[id]
            image = get_image(name)
            category = get_category(id)
            image, category = random_crop(image, category)

            image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

            X[i_batch] = image
            Y[i_batch] = to_categorical(category, num_classes)

        X = preprocess_input(X)

        return X, Y
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    model_dir = utils.get_model_dir(config)
    category = utils.get_category(config)
    anchors = torch.from_numpy(utils.get_anchors(config)).contiguous()
    path, step, epoch = utils.train.load_model(model_dir)
    state_dict = torch.load(path, map_location=lambda storage, loc: storage)
    dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels(
        config, state_dict), anchors, len(category))
    logging.info(
        humanize.naturalsize(
            sum(var.cpu().numpy().nbytes
                for var in dnn.state_dict().values())))
    dnn.load_state_dict(state_dict)
    height, width = tuple(map(int, config.get('image', 'size').split()))
    image = torch.autograd.Variable(
        torch.randn(args.batch_size, 3, height, width))
    output = dnn(image)
    state_dict = dnn.state_dict()
    closure = utils.walk.Closure(args.name, state_dict,
                                 type(dnn).scope, args.debug)
    closure(output.grad_fn)
    d = utils.dense(state_dict[args.name])
    channels = torch.LongTensor(np.argsort(d)[int(len(d) * args.remove):])
    utils.walk.prune(closure, channels)
    if args.debug:
        path = closure.dot.view(
            os.path.basename(model_dir) + '.gv', os.path.dirname(model_dir))
        logging.info(path)
    else:
        torch.save(state_dict, path)
Exemple #15
0
    def startSpiderWap(self):
        if self.spider_queue.empty():
            fetched_users = self.db.execute(
                'SELECT * from spider_list ORDER BY weight DESC limit 0,30')
            if fetched_users <= 0:
                print 'nothing to spider,spider_list is empty'
                return False
            self.start = 'start'
            self.errno = ERR_NO
            fetchall = self.db.fetchall()
            # 将数据库中取出的待爬取的分享者,加入爬取队列
            for item in fetchall:
                self.spider_queue.put({
                    'sid': item[0],
                    'uk': item[1],
                    'file_fetched': item[2],
                    'follow_fetched': item[3],
                    'follow_done': item[4],
                    'file_done': item[5],
                    'weight': item[6],
                    'uid': item[7]
                })
            self.got_follow_count = 0
            self.got_files_count = 0
            self.while_count = 0

        while not self.spider_queue.empty():
            self.while_count += 1
            share_user = self.spider_queue.get()
            # 爬取分享者的文件列表
            if not share_user['file_done']:
                print '%d now spidering file ,%d  file fetched' % (
                    share_user['uk'], share_user['file_fetched'])
                rs = self.getShareListsWap(share_user['uk'],
                                           share_user['file_fetched'])
                if not rs:
                    print 'uk:%d error to fetch files,try again later...' % share_user[
                        'uk']
                    return True
                total_count, fetched_count, file_list = rs
                total_fetched = share_user['file_fetched'] + fetched_count
                print 'fetched_file_count:%d' % fetched_count
                if total_fetched >= total_count or total_count == 0:
                    share_user['file_done'] = 1  # 该分享者所有文件爬取完成
                if total_count == 0:
                    self.db.execute(
                        "UPDATE spider_list set file_done=%s WHERE sid=%s",
                        (1, share_user['sid']))
                    self.db.commit()
                else:
                    try:
                        files_count = 0
                        for file in file_list:
                            files_count += 1
                            ext = ''
                            file_type = ''
                            file_type_i = -1
                            if file['isdir'] == 0 and file[
                                    'feed_type'] == 'share':
                                ext = utils.get_extension(
                                    file['title']).lower()
                                file_type = utils.get_category(ext)
                                file_type_i = self.file_type_t[file_type]
                            time_stamp = int(time.time())
                            self.db.execute(
                                "INSERT INTO share_file (title,uk,shareid,shorturl,isdir,size,md5,ext,feed_time,create_time,file_type,uid,feed_type) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                                (file['title'], file['uk'], file['shareid'],
                                 file['shorturl'], file['isdir'], file['size'],
                                 file['md5'], ext, file['feed_time'],
                                 time_stamp, file_type_i, share_user['uid'],
                                 file['feed_type']))
                    except:
                        share_user['file_done'] = 0
                        self.db.rollback()
                        traceback.print_exc()
                        return False
                    else:
                        self.db.execute(
                            "UPDATE spider_list set file_fetched=%s,file_done=%s WHERE sid=%s",
                            (total_fetched, share_user['file_done'],
                             share_user['sid']))
                        self.db.execute(
                            "UPDATE share_users set fetched=%s WHERE uid=%s",
                            (total_fetched, share_user['uid']))
                        share_user['file_fetched'] = total_fetched
                        self.got_files_count += files_count
                        self.db.commit()

            # 爬取完文件后在爬取订阅列表,wap暂时不爬取
            if share_user['follow_done'] == 0 and share_user['file_done'] == 1:
                share_user['follow_done'] = 1
                print '删除用户:%d' % share_user['sid']
                self.db.execute("DELETE FROM spider_list WHERE sid=%s",
                                (share_user['sid'], ))
                self.db.commit()
            time.sleep(SPIDER_INTERVAL)

        print '-----------------Done------------------'
        print 'while_count:%d' % self.while_count
        print 'got_follow_count:%d' % self.got_follow_count
        print 'got_files_count:%d' % self.got_files_count
        return True
Exemple #16
0
def build_discord_tr_embed(comment: dict, cmds_args: dict) -> DiscordEmbed:
    """Creates a Discord embed for a Utopian task request.

    :param comment: Steem root post with task request
    :type comment: dict
    :param cmds_args: Parsed bot commands and arguments
    :type cmds_args: dict
    """
    category = get_category(comment, TASKS_PROPERTIES)
    color = 0
    type_ = None
    thumbnail = None
    if category is not None:
        color = int(TASKS_PROPERTIES[category]["color"][1:], 16)
        type_ = TASKS_PROPERTIES[category]["category"]
        thumbnail = TASKS_PROPERTIES[category]["image_url"]

    title = f'{comment["title"]}'
    description_parts = []
    if cmds_args.get("description") is not None:
        description_parts.append(cmds_args["description"].strip())
    # description_parts.append(
    #     f'*You can read [here]({build_comment_link(comment)}) the whole task by **{comment["author"]}**.*'
    # )

    description = "\n\n".join(description_parts)
    embed = DiscordEmbed(title=title, description=description)
    author = Account(comment["author"])
    embed.set_author(
        name=author.name,
        url=f"{UI_BASE_URL}/@{author.name}",
        icon_url=author.profile.get("profile_image"),
    )
    embed.set_color(color)
    embed.set_footer(text="Verified by Utopian.io team")
    embed.set_thumbnail(url=thumbnail)
    embed.set_timestamp()

    if type_ is not None:
        embed.add_embed_field(name="Task Type", value=type_.upper(), inline=True)

    status = None
    if cmds_args.get("status") is not None:
        status = cmds_args["status"]
        embed.add_embed_field(name="Status", value=status.upper(), inline=True)

    if status and status.upper() == "CLOSED":
        return embed

    if cmds_args.get("skills"):
        skills = normalize_str(cmds_args["skills"])
        embed.add_embed_field(name="Required skills", value=skills, inline=True)

    if cmds_args.get("discord") is not None:
        embed.add_embed_field(
            name="Discord", value=f'{cmds_args["discord"]}', inline=True
        )

    if cmds_args.get("bounty"):
        bounty = normalize_str(cmds_args["bounty"]).upper()
    else:
        bounty = "See the task details"
    embed.add_embed_field(name="Bounty", value=bounty, inline=True)

    if cmds_args.get("deadline"):
        deadline = cmds_args["deadline"]
    else:
        deadline = "Not specified"
    embed.add_embed_field(name="Due date", value=deadline, inline=True)

    is_in_progress = status and status.upper() == "IN PROGRESS"
    if is_in_progress and cmds_args.get("assignees"):
        assignees = normalize_str(cmds_args["assignees"]).lower()
        assignees_links = accounts_str_to_md_links(assignees)
        embed.add_embed_field(name="Assignees", value=assignees_links, inline=False)

    if cmds_args.get("note") is not None:
        embed.add_embed_field(name="Misc", value=f'{cmds_args["note"]}', inline=False)

    return embed
Exemple #17
0
 def extract_categories(self):
     for ex in self.transcripts:
         self.categories.add(utils.get_category(ex))
Exemple #18
0
 def add_category_features(cls, ex, categories, features):
     category = utils.get_category(ex)
     one_hot = [1 if k == category else 0 for k in categories]
     features.extend(one_hot)
Exemple #19
0

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    doc2vec_model = Doc2Vec.load(model_filename)

    with open(data_filename, 'rb') as f:
        documents = cPickle.load(f)

    train_test_ratio = 0.8

    messages = [' '.join(doc.words) for doc in documents]

    categories = {}
    for message in messages:
        category = get_category(message)
        categories.setdefault(category, []).append(message)

    # for category, keywords_ in keywords.iteritems():
    #    pass

    import IPython
    IPython.embed()

    buts = [doc for doc in documents if doc.event == 'BUT']
    riens = [doc for doc in documents if doc.event == 'rien'][:len(buts)]
    documents = buts + riens
    random.shuffle(documents)

    train_size = int(len(documents) * train_test_ratio)
    train_set = documents[:train_size]
Exemple #20
0
    logging.basicConfig(level=logging.INFO)
    doc2vec_model = Doc2Vec.load(model_filename)

    with open(data_filename, 'rb') as f:
        documents = cPickle.load(f)

    train_test_ratio = 0.8

    messages = [
        ' '.join(doc.words)
        for doc in documents
    ]

    categories = {}
    for message in messages:
        category = get_category(message)
        categories.setdefault(category, []).append(message)

    # for category, keywords_ in keywords.iteritems():
    #    pass

    import IPython; IPython.embed()


    buts = [doc for doc in documents if doc.event == 'BUT']
    riens = [doc for doc in documents if doc.event == 'rien'][:len(buts)]
    documents = buts + riens
    random.shuffle(documents)

    train_size = int(len(documents) * train_test_ratio)
    train_set = documents[:train_size]
Exemple #21
0
    for tweet in ts.search_tweets_iterable(tso):
        try:
            if tweet['user'].get('location') is not None:
                place = tweet['user'].get('location')
                if place:
                    tweet_id = str(tweet['id'])
                    geocode_result = gmaps.geocode(place)
                    lat = geocode_result[0]['geometry']['location']['lat']
                    lng = geocode_result[0]['geometry']['location']['lng']
                    tweet_text = tweet['text'].lower().encode(
                        'ascii', 'ignore').decode('ascii')
                    raw_tweet = {
                        'user': tweet['user']['screen_name'],
                        'text': tweet_text,
                        'place': place,
                        'coordinates': {
                            'location': str(lat) + "," + str(lng)
                        },
                        'time': tweet['created_at'],
                        'category': get_category(tweet_text)
                    }
                    es.index(index=ES_INDEX,
                             doc_type=ES_TYPE,
                             id=tweet_id,
                             body=raw_tweet)
        except Exception as e:
            print e
            continue

except TwitterSearchException as e:
    print(e)
    iaa.Multiply((0.8, 1.2), per_channel=0.2),

], random_order=True)

if __name__ == '__main__':
    with open('names.txt', 'r') as f:
        names = f.read().splitlines()

    filename = 'valid_ids.txt'
    with open(filename, 'r') as f:
        ids = f.read().splitlines()
        ids = list(map(int, ids))
    id = random.choice(ids)
    name = names[id]
    image = get_image(name)
    category = get_category(id)

    image = cv.resize(image, (img_rows, img_cols), cv.INTER_NEAREST)
    category = cv.resize(category, (img_rows, img_cols), cv.INTER_NEAREST)

    length = 10
    images = np.zeros((length, img_rows, img_cols, 3), np.uint8)
    categories = np.zeros((length, img_rows, img_cols), np.uint8)
    for i in tqdm(range(length)):
        images[i] = image.copy()
        categories[i] = category.copy()

    images_aug = seq_img.augment_images(images)
    images_aug = seq_det.augment_images(images_aug)
    categories_aug = seq_det.augment_images(categories)
Exemple #23
0
def build_discord_tr_embed(comment: dict, cmds_args: dict) -> DiscordEmbed:
    """Creates a Discord embed for a Utopian task request.

    :param comment: Steem root post with task request
    :type comment: dict
    :param cmds_args: Parsed bot commands and arguments
    :type cmds_args: dict
    """
    category = get_category(comment, TASKS_PROPERTIES)
    color = 0
    type_ = None
    thumbnail = None
    if category is not None:
        color = int(TASKS_PROPERTIES[category]["color"][1:], 16)
        type_ = TASKS_PROPERTIES[category]["category"]
        thumbnail = TASKS_PROPERTIES[category]["image_url"]

    title = f'{comment["title"]}'
    description = None
    if cmds_args.get("description"):
        description = cmds_args["description"]
    embed = DiscordEmbed(title=title, description=description)
    author = comment["author"]
    embed.set_author(
        name=author,
        url=f"{UI_BASE_URL}/@{author}",
        icon_url=f"https://steemitimages.com/u/{author}/avatar",
    )
    embed.set_color(color)
    embed.set_footer(text="Verified by Utopian.io team")
    embed.set_thumbnail(url=thumbnail)
    embed.set_timestamp()

    if type_ is not None:
        embed.add_embed_field(name="Task Type",
                              value=type_.upper(),
                              inline=True)

    status = None
    if cmds_args.get("status") is not None:
        status = cmds_args["status"]
        embed.add_embed_field(name="Status", value=status.upper(), inline=True)

    if status and status.upper() == "CLOSED":
        return embed

    if cmds_args.get("skills"):
        skills = ", ".join(cmds_args["skills"])
        embed.add_embed_field(name="Required skills",
                              value=skills,
                              inline=True)

    if cmds_args.get("discord"):
        embed.add_embed_field(name="Discord",
                              value=f'{cmds_args["discord"]}',
                              inline=True)

    if cmds_args.get("bounty"):
        bounty = ", ".join(cmds_args["bounty"])
    else:
        bounty = "See the task details"
    embed.add_embed_field(name="Bounty", value=bounty, inline=True)

    deadline = cmds_args.get("deadline")
    if not deadline:
        deadline = "Not specified"
    embed.add_embed_field(name="Due date", value=deadline, inline=True)

    is_in_progress = status and status.upper() == "IN PROGRESS"
    if is_in_progress and cmds_args.get("assignees"):
        assignees = ", ".join([f"@{a}" for a in cmds_args["assignees"]])
        assignees_links = accounts_str_to_md_links(assignees)
        embed.add_embed_field(name="Assignees",
                              value=assignees_links,
                              inline=False)

    if cmds_args.get("note") is not None:
        embed.add_embed_field(name="Misc",
                              value=f'{cmds_args["note"]}',
                              inline=False)

    return embed
Exemple #24
0
    def startSpider(self):
        if self.spider_queue.empty():
            fetched_users = self.db.execute('SELECT * from spider_list ORDER BY weight DESC limit 0,20')
            if fetched_users <= 0:
                print('nothing to spider,spider_list is empty')
                return False
            self.start = 'start'
            self.errno = ERR_NO
            fetchall = self.db.fetchall()
            # 将数据库中取出的待爬取的分享者,加入爬取队列
            for item in fetchall:
                self.spider_queue.put({
                    'sid': item[0],
                    'uk': item[1],
                    'file_fetched': item[2],
                    'follow_fetched': item[3],
                    'follow_done': item[4],
                    'file_done': item[5],
                    'weight': item[6],
                    'uid': item[7]
                })
            self.got_follow_count = 0
            self.got_files_count = 0
            self.while_count = 0

        while not self.spider_queue.empty():
            self.while_count += 1
            share_user = self.spider_queue.get()
            # 爬取分享者的文件列表
            if not share_user['file_done']:
                print('%d now spidering file ,%d  file fetched' % (share_user['uk'], share_user['file_fetched']))
                rs = self.getShareLists(share_user['uk'], share_user['file_fetched'])
                if not rs:
                    print('uk:%d error to fetch files,try again later...' % share_user['uk'])
                    return True
                total_count, fetched_count, file_list = rs
                total_fetched = share_user['file_fetched'] + fetched_count
                print('fetched_file_count:%d' % fetched_count)
                if total_fetched >= total_count or total_count == 0:
                    share_user['file_done'] = 1  # 该分享者所有文件爬取完成
                if total_count == 0:
                    self.db.execute("UPDATE spider_list set file_done=%s WHERE sid=%s", (1, share_user['sid']))
                    self.db.commit()
                else:
                    try:
                        files_count = 0
                        for file in file_list:
                            files_count += 1
                            ext = ''
                            file_type = ''
                            file_type_i = -1
                            if file['isdir'] == 0 and file['feed_type'] == 'share':
                                ext = utils.get_extension(file['title']).lower()
                                file_type = utils.get_category(ext)
                                file_type_i = self.file_type_t[file_type]
                            time_stamp = int(time.time())
                            self.db.execute(
                                    "INSERT INTO share_file (title,uk,shareid,shorturl,isdir,size,md5,ext,feed_time,create_time,file_type,uid,feed_type) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                                    (file['title'], file['uk'], file['shareid'],
                                     file['shorturl'], file['isdir'], file['size'], file['md5'], ext, file['feed_time'],
                                     time_stamp, file_type_i, share_user['uid'], file['feed_type'])
                            )
                    except:
                        share_user['file_done'] = 0
                        self.db.rollback()
                        traceback.print_exc()
                        return False
                    else:
                        self.db.execute("UPDATE spider_list set file_fetched=%s,file_done=%s WHERE sid=%s",
                                        (total_fetched, share_user['file_done'], share_user['sid']))
                        self.db.execute("UPDATE share_users set fetched=%s WHERE uid=%s",
                                        (total_fetched, share_user['uid']))
                        share_user['file_fetched'] = total_fetched
                        self.got_files_count += files_count
                        self.db.commit()

            # 爬取完文件后在爬取订阅列表
            if share_user['follow_done'] == 0 and share_user['file_done'] == 1:
                print('%d now spidering follow ,%d  follow fetched' % (share_user['uk'], share_user['follow_fetched']))
                rs = self.getFollows(share_user['uk'], share_user['follow_fetched'])
                if not rs:
                    print('error to fetch follows,try again later...')
                    return
                total_count, fetched_count, follow_list = rs
                total_fetched = share_user['follow_fetched'] + fetched_count
                print('fetched_follow_count:%d' % fetched_count)
                if total_fetched >= total_count or total_count == 0:
                    share_user['follow_done'] = 1
                if total_count == 0:
                    self.db.execute("DELETE FROM spider_list WHERE sid=%s", (share_user['sid'],))
                    self.db.commit()
                else:
                    try:
                        follow_count = 0
                        for follow in follow_list:
                            follow_count += 1
                            # 判断该用户是否已经在表中了
                            if self.db.execute('SELECT * FROM share_users WHERE uk=%s', (follow['follow_uk'],)) > 0:
                                print('uk:%d has already in share_user table' % follow['follow_uk'])
                                continue
                            time_stamp = int(time.time())
                            self.db.execute("INSERT INTO share_users (uk,user_name,avatar_url,intro,follow_count,album_count,\
                                fens_count,pubshare_count,last_visited,create_time,weight) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
                                            (
                                                follow['follow_uk'], follow['follow_uname'], follow['avatar_url'],
                                                follow['intro'], follow['follow_count'],
                                                follow['album_count'], follow['fans_count'], follow['pubshare_count'],
                                                time_stamp, time_stamp, 5
                                            )
                                            )
                            # 将获取的新分享者加入爬取列表
                            self.db.execute("INSERT INTO spider_list (uk,uid) VALUES(%s,%s)",
                                            (follow['follow_uk'], self.db.last_row_id()))
                    except:
                        share_user['follow_done'] = 0
                        self.db.rollback()
                        traceback.print_exc()
                        return False
                    else:
                        if share_user['follow_done'] == 1:
                            # 订阅者爬取完成,该分享者的任务完成,从待爬取列表中删除
                            print('delete follow fetched sid:%d from spider_list' % share_user['sid'])
                            self.db.execute("DELETE FROM spider_list WHERE sid=%s", (share_user['sid'],))
                        else:
                            self.db.execute("UPDATE spider_list set follow_fetched=%s,follow_done=%s WHERE sid=%s",
                                            (total_fetched, share_user['follow_done'], share_user['sid']))
                        share_user['follow_fetched'] = total_fetched
                        self.got_follow_count += follow_count
                        self.db.commit()
            # 只要分享者列表没完成,说明该分享者还未爬取完,则加入工作队列,继续爬取
            if share_user['follow_done'] == 0:
                self.spider_queue.put(share_user)
            else:
                print('%d has done' % share_user['uk'])
                del share_user
            time.sleep(SPIDER_INTERVAL)

        print('-----------------Done------------------')
        print('while_count:%d' % self.while_count)
        print('got_follow_count:%d' % self.got_follow_count)
        print('got_files_count:%d' % self.got_files_count)
        return True
Exemple #25
0
confirm_button = st.checkbox('GO!')
if confirm_button:
    show_covid_feature_relationship(group_dict, sub_feature_list)

# 1.1.2 show how one is affected by multiple
show_covid_feature_multi_relationship(total_covid_feature, yelp_covid_bool_df)

# city
# geometric interactive, state/city

st.write("## 3. How Businesses' affect their reaction?")

st.markdown("Let's now explore how businesses of different categories behave. \
    We start by looking at whether different categories react differently with the above COVID features."
            )
business_category_info = get_category(yelp_join)
show_business_in_category(yelp_covid_bool_df, business_category_info)

st.markdown("### 3.1 How long do they plan to close")
close_for_how_long(yelp_join)

st.markdown("### 3.2 What do Covid Banner say")
what_covid_banner_say(yelp_join, business_category_info)

st.markdown("### 3.3 What are in the highlights")
business_highlight_info = get_highlight_info(yelp_join)
what_are_highlights(business_highlight_info)

st.markdown("## 2. How Businesses' location affect their reaction?")

total_targets = yelp_covid_bool_df.columns[1:]