Esempi in Python per save_json, esempi in Python per tools.save_json

Esempio n. 1

0

Mostra file

File: logging_tools.py Progetto: jgrizou/openvault_web

    def log_new_connnection(self, client_ip, user_agent, room_id,
                            config_filename, config):

        connection_info = {}

        connection_info['room_id'] = room_id
        connection_info['config_filename'] = config_filename
        connection_info['config'] = config

        connection_info['client_ip'] = client_ip
        connection_info['client_ip_info'] = get_ip_information(client_ip)

        if user_agent:
            connection_info['user_agent'] = user_agent.string
            connection_info['user_platform'] = user_agent.platform
            connection_info['user_browser'] = user_agent.browser

        timestamp = time.time()
        timestamp_human_readable = datetime.datetime.fromtimestamp(
            timestamp).strftime('%H:%M:%S on %d/%m/%Y')
        connection_info['timestamp'] = timestamp
        connection_info['timestamp_human_readable'] = timestamp_human_readable

        connection_info_filename = os.path.join(self.log_folder,
                                                'connection_info.json')
        tools.save_json(connection_info_filename, connection_info)

Esempio n. 2

0

Mostra file

File: logging_tools.py Progetto: jgrizou/openvault_web

    def save_drawing_to_file(self, drawing_data):
        drawing_folder = os.path.join(self.log_folder, DRAWING_FOLDERNAME)
        tools.ensure_dir(drawing_folder)

        drawing_files = tools.list_files(drawing_folder, ['*.json'])
        drawing_filename = os.path.join(
            drawing_folder, '{:04}.json'.format(len(drawing_files)))
        tools.save_json(drawing_filename, drawing_data)

        return drawing_filename

Esempio n. 3

0

Mostra file

File: worker.py Progetto: dfl121/CrawlerOfPython

def listener(event):
    if event.exception:
        global data
        global project_dir
        logger.info("【{}任务退出】{}".format(event.job_id, event.exception.message))
        logger.info("【data】{}".format(data))
        save_json(project_dir, "9 data", data)

    else:
        logger.info("【爬取任务正常运行】")

Esempio n. 4

0

Mostra file

File: sgtk-menu.py Progetto: saymoncoppi/sgtk-menu

def launch(item, command, no_cache=False):
    if not no_cache:
        # save command and increased clicks counter to the cache file; we won't cache items from the user-defined menu
        if command not in cache:
            cache[command] = 1
        else:
            cache[command] += 1
        save_json(cache, cache_file)
    # run the command an quit
    subprocess.Popen('exec {}'.format(command), shell=True)
    Gtk.main_quit()

Esempio n. 5

0

Mostra file

File: logging_tools.py Progetto: jgrizou/openvault_web

    def save_learner_logs_to_file(self, learner_logs):
        learner_logs_folder = os.path.join(self.log_folder,
                                           LEARNER_LOGS_FOLDERNAME)
        tools.ensure_dir(learner_logs_folder)

        files = tools.list_files(learner_logs_folder, ['*.json'])
        learner_logs_filename = os.path.join(learner_logs_folder,
                                             '{:04}.json'.format(len(files)))

        tools.save_json(learner_logs_filename, learner_logs)

        return learner_logs_filename

Esempio n. 6

0

Mostra file

File: handle_files.py Progetto: skeapskeap/s7_test

def main_pipeline():
    '''
    Base logic here
    '''
    new_files = tools.check_input_dir()
    for file_name in new_files:
        p = Parser(dirs.INPUT_DIR + file_name)
        try:
            json_data = p.make_json()
            tools.save_json(json_data, file_name)
            save_to_db(json_data)
            tools.move_input_file(file_name, dirs.OK_DIR)
        except tools.MyLocalException:
            tools.move_input_file(file_name, dirs.ERR_DIR)

Esempio n. 7

0

Mostra file

File: worker.py Progetto: dfl121/CrawlerOfPython

def loop():
    # 检查是否到时间
    global end_timestrap
    global request_points
    current_timestamp = get_timestamp()
    if end_timestrap is not None:  #有设置结束时间
        if current_timestamp > end_timestrap:
            sched.shutdown()

    current_time = timestamp_to_time(current_timestamp)
    logger.info("【开始爬取】{}".format(current_time))

    # 爬取工作
    global data
    for frame, value in request_points.items():
        if value["extent"] is None:  #该图幅没有img
            continue
        req_pnt = value["req_pnt"]
        imgs = do_get(*req_pnt)
        if imgs is None:
            logger.error("图幅{}出错：在该位置上没有获得imgs".format(frame))
            continue
        # 处理imgs
        for img in imgs:
            timestamp = img[1]  #时间戳
            time = timestamp_to_time(timestamp)  #时间
            # 加入数据
            if frame not in data:
                data[frame] = OrderedDict()
            if time not in data[frame]:
                data[frame][time] = {
                    "timestamp": timestamp,
                    "time": time,
                    "url": img[0],
                    "extent": img[2],
                    "req_pnt": req_pnt,
                    "file_path": ""
                }
            else:
                pass
    print data

    save_json(project_dir, "9 data", data)

    download()  #下载
    registration()  #配准
    return

Esempio n. 8

0

Mostra file

File: worker.py Progetto: dfl121/CrawlerOfPython

def registration():
    global data
    global data_rgs
    global params
    global logger

    rgs_dir = params["registration_dir"]

    for frame, frame_value in data.items():
        for time, time_value in frame_value.items():
            # 原始数据
            origin_file_path = time_value["file_path"]  #原始数据文件路径
            if origin_file_path == "":
                # 还没有下载，退出
                continue
            origin_file_name = time_value["file_name"]  #原始数据文件名

            # 配准数据
            if frame not in data_rgs:
                data_rgs[frame] = OrderedDict()

            if time in data_rgs[frame]:
                # 该数据已经配准过了
                continue

            # 这个数据还没有配准
            rgs_file_name = "{} {}.png".format(frame, time)
            rgs_file_path = os.path.join(rgs_dir, rgs_file_name)  # 配准数据文件路径
            if os.path.exists(rgs_file_path) is True:  #配准文件已经存在
                continue

            rgs_data_item = do_rgs(time_value, rgs_file_path)
            if rgs_data_item is None:
                logger.error("[配准失败！] {}".format(origin_file_path))
            else:
                # 配准完成，保存到data_rgs
                data_rgs[frame][time] = rgs_data_item

    save_json(project_dir, "9 data_rgs", data_rgs)  #保存配准信息
    pass

Esempio n. 9

0

Mostra file

    def epoch_step(self, logs={}):
        for (k, v) in logs.items():
            l = self.H.get(k, [])
            # np.float32会报错
            if not isinstance(v, np.float):
                v = round(float(v), 4)
            l.append(v)
            self.H[k] = l

        # 写入文件
        if self.json_path is not None:
            save_json(data=self.H, file_path=self.json_path)

        # 保存train图像
        if len(self.H["loss"]) == 1:
            self.paths = {
                key: self.file_dir / (self.arch + f'_{key.upper()}')
                for key in self.H.keys()
            }

        if len(self.H["loss"]) > 1:
            # 指标变化
            # 曲线
            # 需要成对出现
            keys = [key for key, _ in self.H.items() if '_' not in key]
            for key in keys:
                N = np.arange(0, len(self.H[key]))
                plt.style.use("ggplot")
                plt.figure()
                plt.plot(N, self.H[key], label=f"train_{key}")
                plt.plot(N, self.H[f"valid_{key}"], label=f"valid_{key}")
                if self.add_test:
                    plt.plot(N, self.H[f"test_{key}"], label=f"test_{key}")
                plt.legend()
                plt.xlabel("Epoch #")
                plt.ylabel(key)
                plt.title(f"Training {key} [Epoch {len(self.H[key])}]")
                plt.savefig(str(self.paths[key]))
                plt.close()

Esempio n. 10

0

Mostra file

File: worker.py Progetto: dfl121/CrawlerOfPython

def download():
    global data
    global project_dir
    global logger

    # original文件夹
    # original_dir = os.path.join(project_dir, "0original")
    # if os.path.exists(original_dir) is False:
    #     os.mkdir(original_dir)
    original_dir = params["original_dir"]

    for frame, frame_value in data.items():
        # 图幅文件夹
        frame_dir = os.path.join(original_dir, frame)
        if os.path.exists(frame_dir) is False:
            os.mkdir(frame_dir)

        for time, time_value in frame_value.items():
            # 已下载-->退出
            if time_value["file_path"] != "":
                continue

            # 下载
            url = time_value["url"]
            fn, fp = download_file(url, frame_dir)
            if fn == None:  #下载错误
                logger.error("[下载失败！] {}".format(url))
                continue  #退出
            else:  #下载完成
                time_value["file_path"] = fp
                time_value["file_name"] = fn
                # 保存头文件
                hdr_fn = os.path.splitext(fn)[0]  #头文件名称
                save_json(frame_dir, hdr_fn, time_value)
                # logger.info("[下载成功] {}".format(fp) )
    pass

Esempio n. 11

0

Mostra file

File: get_user.py Progetto: jdayllon/TwitterScrapper

def query_user(
        user: str,
        config_file: str,
        start_date: str = arrow.get().format('YYYY-MM-DD'),
        end_date: str = arrow.get().shift(years=-10).format('YYYY-MM-DD'),
        time_sleep: float = 1.1):
    """Get Twitter information about user profiles
    
    Arguments:
        user {str} -- Screen name about twitter
        config_file {str} -- Config yml file
    
    Keyword Arguments:
        time_sleep {float} -- [description] (default: {1.1})
        start_date {str} -- Start date from being requested a twitter user (default: {arrow.get().format('YYYY-MM-DD')})
        end_date {str} -- End date from being requested a twitter user (default: {arrow.get().shift(years=-10).format('YYYY-MM-DD')})
    
    Raises:
        err -- Error when time sleeps is less than 1.1
    """
    settings = Settings()._load_config(config_file)

    # Create a connection with Elastic
    if settings.elasticsearch_url is not None:
        es = Elasticsearch(settings.elasticsearch_url)
        logger.info(es.info())
    else:
        es = None

    # Check if time_sleep is more than 1.1 secs
    try:
        assert time_sleep >= 1.1
    except:
        logger.error("Time Sleep less than 1.1 secs (minimum) ")
        raise err

    api = twitter.Api(consumer_key=settings.CONSUMER_KEY,
                      consumer_secret=settings.CONSUMER_SECRET,
                      access_token_key=settings.ACCESS_TOKEN_KEY,
                      access_token_secret=settings.ACCESS_TOKEN_SECRET,
                      tweet_mode='extended')

    user_query = "to:%s OR from:%s OR on:%s" % (user, user, user)

    logger.info("Scrapping query on Twitter")

    df = scrape_twitter_by_date(user_query, start_date, end_date)

    if df is not None:
        lst_statuses_ids = df['STATUS_ID'].tolist()
        hydrataded_statuses = hydratate_status(api, lst_statuses_ids)
        ## Save all jsons to file and load into Elastic
        logger.info("Processing Statuses from Twitter API to save jsons")
        for c_status_data in tqdm(hydrataded_statuses):
            cur_dict = Cut(c_status_data.AsDict())
            cur_id_str = cur_dict['id_str']

            #To ES improved data
            cur_json = _prepare_json_status(cur_dict.data)
            # TO FS orginal data backup
            cur_json_backup = json.dumps(cur_dict.data, indent=4)

            save_json(cur_json_backup,
                      settings.status_json_backup + cur_id_str + ".json")

            if es is not None:
                logger.debug("Indexing: %s " % cur_id_str)
                es.index(index=settings.ELASTICSEARCH_STATUS_INDEX,
                         doc_type='status',
                         id=cur_id_str,
                         body=cur_json)

Esempio n. 12

0

Mostra file

parser.add_argument("--topic_tree_url",
                    help="url of Khan academy topic tree",
                    action="store",
                    required=True)
parser.add_argument("--topic_tree_backup",
                    help="path to backup of Khan academy topic tree",
                    action="store",
                    required=True)
parser.add_argument("--youtube_ids_backup",
                    help="path to backup of youtube ids",
                    action="store",
                    required=True)

args = parser.parse_args()

topic_tree_url = args.topic_tree_url
topic_tree_backup = args.topic_tree_backup
youtube_ids_backup = args.youtube_ids_backup

# download topic tree if not already downloaded
if os.path.isfile(topic_tree_backup):
    topic_tree = tools.load_json(topic_tree_backup)
else:
    topic_tree = tools.download_topic_tree(topic_tree_url)
    tools.save_json(topic_tree_backup, topic_tree)

youtube_ids = tools.get_youtube_ids(topic_tree)
tools.save_dump(youtube_ids_backup, youtube_ids)
tools.save_json(youtube_ids_backup + '.json', youtube_ids)
tools.save_yaml(youtube_ids_backup + '.yaml', youtube_ids)

Esempio n. 13

0

Mostra file

File: app.py Progetto: dfl121/CrawlerOfPython

def initparam():
    """ 初始化参数
    """
    params = OrderedDict()

    print "【从前端获得参数】 %s" % str(request.values)
    print "【正在处理参数】"

    # 【开始时间】
    params["start_time"] = request.form.get("startTime", type=str)
    if is_timestr(params['start_time']) is False:
        params['start_time'] = get_time()
    print "\t[开始时间] %s" % params["start_time"]

    # 【结束时间】
    params["end_time"] = request.form.get("endTime", type=str)
    if is_timestr(params['end_time']) is False:
        params['end_time'] = "未设置" # 不结束
    print "\t[结束时间] %s" %params["end_time"]

    # 【时间间隔】
    params["interval"] = request.form.get("interval", type=int)
    params['step'] = params['interval'] / 5  # 步长
    print "\t[时间间隔] %d" % params["interval"]
    print "\t[保存图片的步长] %d" % params["step"]

    # 【处理点】
    points = {
        'center_point' : request.form.get("centerPoint", type=str),
        'north_west_point' : request.form.get("northWestPoint", type=str),
        'north_east_point' : request.form.get("northEastPoint", type=str),
        'south_east_point' : request.form.get("southEastPoint", type=str),
        'south_west_point' : request.form.get("southWestPoint", type=str)
    }
    print '\t[点] %s' % str(points)
    for point_name,value in points.items():
        params[point_name] = get_latlng(
            value
        )

    # 【保存文件夹】
    params["save_file_dir"] = request.form.get("saveFileDir", type=str)
    print "\t[保存文件夹] %s" % params["save_file_dir"]
    save_dir = params["save_file_dir"]
    if not os.path.exists(save_dir):
        # 选择的文件夹不存在
        print "\t【WARNING】 选择的文件夹不存在，跳转到初始页面"
        print "【params】" + str(params)
        return redirect_index("文件夹路径不存在，请重新输入！")
    # # 【输出文件夹】
    out_dir = os.path.join(save_dir, params['start_time'])
    params['out_dir'] = out_dir
    print "\t[图像输出文件夹] %s" % params["out_dir"]
    if os.path.exists(out_dir):
        # 输出文件夹已经存在
        print "【params】" + str(params)
        return redirect_index("输出文件夹已经存在，%s<Br/>请重新输入文件夹！" % out_dir)
    os.makedirs(out_dir)

    # 项目备注
    params["remark"] = request.form.get("remark")
    print "\t[项目备注] %s" % params["remark"]

    img_len , request_points = init_crawler(params) #计算每次请求的中心点
    print request_points
    if img_len==0:
        return "抱歉，任务失败！<br/>框选区域没有雷达降水图。"
    else:
        # 初始化文件夹
        init_dir(params, request_points)

        # 保存爬取参数
        print "【params】" + str(params)
        save_json(out_dir, u"0 param - 爬取参数", params) #保存成json
        file_str = save_params_file(params) #保存给用户看
        html_str = file_str.replace('\n', '<br/>') #输出的HTML字符串

        # 开启任务，异步进程
        executor.submit(
            start(params, request_points)
        )

        return '任务已在后台运行！<br/>每次需要爬取{}张图片！<br/>{}'.format(img_len, html_str)

Esempio n. 14

0

Mostra file

def download_api_timeline(user: str,
                          time_sleep: float = 1.1,
                          since: str = '0'):
    """Goes to twitter API an get timeline of a user_id and saves into a json file (in "json" dir) and if Elasticsearch is identified send it too
    
    Arguments:
        user {str} -- Twitter Screen Name
        elasticsearch_url {str} -- Base url of ElasticSearch
    
    Keyword Arguments:
        time_sleep {float} -- Time between requests (default: {1.1})
        since {str} -- Status ID to start twitter extraction (default: {'0'})        
    """

    global first_status_id

    settings = Settings()

    # Create a connection with Elastic
    if settings.ELASTICSEARCH_URL is not None:
        es = Elasticsearch(settings.ELASTICSEARCH_URL)
        logger.info(es.info())
    else:
        es = None

    # Check if time_sleep is more than 1.1 secs
    try:
        assert time_sleep >= 1.1
    except:
        logger.error("Time Sleep less than 1.1 secs (minimum) ")
        raise err

    api = twitter.Api(consumer_key=settings.CONSUMER_KEY,
                      consumer_secret=settings.CONSUMER_SECRET,
                      access_token_key=settings.ACCESS_TOKEN_KEY,
                      access_token_secret=settings.ACCESS_TOKEN_SECRET,
                      tweet_mode='extended')

    # Go to Twitter API and get statuses by id
    logger.info("Downloading TimeLine Statuses from Twitter API")

    all_statuses_data = []
    logger.info("Starting at STATUS_ID: %s" % since)

    since_id = int(since)
    if since_id == 0:
        try:
            s = Search(using=es, index=STATUSES_INDEX, doc_type='status')
            s = s.query('match', user__screen_name=user)
            s = s.sort("-id", "-_id")
            since_id = s.execute()[0]['id']
            logger.info("Starting 🐦 timeline for [%s] from: %d" %
                        (user, since_id))
        except:
            logger.warning(
                "Error retrieving last status from ES for [%s] using 0" % user)

    statuses = api.GetUserTimeline(screen_name=user,
                                   count=200,
                                   include_rts=True,
                                   exclude_replies=False,
                                   since_id=since_id)
    if len(statuses) == 0:
        print("%s" % since)
        logger.warning("There isn't new results for this Timeline")
        return since
    all_statuses_data += statuses
    last_status_id = statuses[-1].id
    first_status_id = statuses[0].id

    # If first status is equals to last in 'query' this means that all tweets has been readed
    while statuses[0].id != last_status_id and statuses is not []:
        statuses = api.GetUserTimeline(screen_name=user,
                                       count=200,
                                       include_rts=True,
                                       exclude_replies=False,
                                       max_id=last_status_id,
                                       since_id=since_id)
        logger.info("Readed: %d twts | Total: %d" %
                    (len(statuses), len(all_statuses_data)))
        all_statuses_data += statuses
        last_status_id = statuses[-1].id
        sleep(time_sleep)

    all_statuses_data = set(all_statuses_data)

    # Save all jsons to file and load into Elastic
    logger.info("Processing TimeLine Statuses from 🐦 API to save jsons")
    for c_status_data in tqdm(all_statuses_data):
        cur_dict = Cut(c_status_data.AsDict())
        cur_id_str = cur_dict['id_str']

        # To ES improved data
        cur_json = _prepare_json_status(c_status_data)
        # TO FS orginal data backup
        cur_json_backup = json.dumps(cur_dict.data, indent=4)

        save_json(cur_json_backup, "./json/" + cur_id_str + ".json")

        if es is not None:
            es.index(
                index=settings.ELASTICSEARCH_STATUS_INDEX,
                #ignore=400,
                doc_type='status',
                id=cur_id_str,
                body=cur_json)
    # STDOut and STDErr
    print("%d" % first_status_id)
    return first_status_id

Esempio n. 15

0

Mostra file

File: 31_statistics.py Progetto: ELITR/khan-academy-corpus

args = parser.parse_args()

output_data_path = args.output_path
video_metadata_path = args.video_metadata_path


video_metadata = tools.load_json(video_metadata_path)

authors_distribution = Counter(chain.from_iterable(map(lambda x: x["author_names"],
                                                       chain.from_iterable(video_metadata.values())))).most_common()

duration_distribution = Counter(map(lambda x: x["duration"],
                                    chain.from_iterable(video_metadata.values()))).most_common()

youtube_id_distribution = Counter(map(lambda x: x["youtube_id"],
                                      chain.from_iterable(video_metadata.values()))).most_common()

keywords_distribution = Counter(chain.from_iterable(map(lambda x: x["keywords"].split(','),
                                                        chain.from_iterable(video_metadata.values())))).most_common()


tools.save_json(os.path.join(output_data_path, 'authors_distribution.json'), authors_distribution)
tools.save_yaml(os.path.join(output_data_path, 'authors_distribution.yaml'), authors_distribution)
tools.save_json(os.path.join(output_data_path, 'duration_distribution.json'), duration_distribution)
tools.save_yaml(os.path.join(output_data_path, 'duration_distribution.yaml'), duration_distribution)
tools.save_json(os.path.join(output_data_path, 'youtube_id_distribution.json'), youtube_id_distribution)
tools.save_yaml(os.path.join(output_data_path, 'youtube_id_distribution.yaml'), youtube_id_distribution)
tools.save_json(os.path.join(output_data_path, 'keywords_distribution.json'), keywords_distribution)
tools.save_yaml(os.path.join(output_data_path, 'keywords_distribution.yaml'), keywords_distribution)

Esempio n. 16

0

Mostra file

File: __init__.py Progetto: tjacek/planning

 def save(self, out_path):
     tools.save_json(self.as_dict(), out_path)

Esempio n. 17

0

Mostra file

def query_api_statuses(query: str,
                       elasticsearch_url: str,
                       elasticuser: str = None,
                       elasticpass: str = None,
                       elasticsearch_index: str = STATUSES_INDEX,
                       time_sleep: float = 1.1,
                       since: str = '0'):
    """Goes to twitter API an get status info and saves into a json file (in "json" dir) and if Elasticsearch is identified send it too
    
    Arguments:
        query {str} -- Proposed query to obtain statuses on Twitter
        elasticsearch_url {str} -- [description]
    
    Keyword Arguments:
        elasticuser {str} -- [description] (default: {None})
        elasticpass {str} -- [description] (default: {None})
        elasticsearch_index {str} -- [description] (default: {STATUSES_INDEX})
        since {str} -- Status ID to start twitter extraction (default: {'0'})        
    """
    # Create a connection with Elastic
    if elasticsearch_url is not None:
        es = Elasticsearch(elasticsearch_url)
        logger.info(es.info())
    else:
        es = None

    # Check if time_sleep is more than 1.1 secs
    try:
        assert time_sleep >= 1.1
    except:
        logger.error("Time Sleep less than 1.1 secs (minimum) ")
        raise err

    api = twitter.Api(consumer_key=CONSUMER_KEY,
                      consumer_secret=CONSUMER_SECRET,
                      access_token_key=ACCESS_TOKEN_KEY,
                      access_token_secret=ACCESS_TOKEN_SECRET,
                      tweet_mode='extended')

    since_id = int(since)

    logger.info("Scrapping query on Twitter")

    df = scrape_twitter_by_date(query,
                                start_date=arrow.now().format('YYYY-MM-DD'),
                                end_date=arrow.now().format('YYYY-MM-DD'))

    if df is not None:
        lst_statuses_ids = df['STATUS_ID'].tolist()
        hydrataded_statuses = hydratate_status(api, lst_statuses_ids)
        ## Save all jsons to file and load into Elastic
        logger.info("Processing Statuses from Twitter API to save jsons")
        for c_status_data in tqdm(hydrataded_statuses):
            cur_dict = Cut(c_status_data.AsDict())
            cur_id_str = cur_dict['id_str']

            # Fix twitter dates to more 'standart' date format
            list_all_keys_w_dots = dotter(cur_dict.data, '', [])
            try:
                for created_at_keys in list_all_keys_w_dots:
                    if 'created_at' in created_at_keys:
                        cur_dt = arrow.get(cur_dict[created_at_keys],
                                           TWITTER_DATETIME_PATTERN)
                        cur_dict[created_at_keys] = cur_dt.format(
                            "YYYY-MM-DDTHH:MM:SS") + "Z"
            except:
                logger.error("Error parsing dates on %s" % cur_id_str)

            cur_json = json.dumps(cur_dict.data, indent=4)

            save_json(cur_json, "./json/" + cur_id_str + ".json")
            if es is not None:
                logger.debug("Indexing: %s " % cur_id_str)
                es.index(
                    index=elasticsearch_index,
                    #ignore=400,
                    doc_type='status',
                    id=cur_id_str,
                    body=cur_json)

Esempio n. 18

0

Mostra file

File: 30_metadata.py Progetto: ELITR/khan-academy-corpus

                 'translated_youtube_lang', 'youtube_id']

topic_tree_leafs = tools.get_leafs(topic_tree)

logging.info('Gathering video metadata')
video_metadata = defaultdict(list)
for leaf in topic_tree_leafs:
    video_metadata[leaf['youtube_id']].append({key: leaf[key] for key in metadata_mask})

video_metadata = dict(video_metadata)

logging.info('Creating simplified map of the topic tree')
topic_tree_map = tools.make_map(topic_tree)

logging.info('Writing common metadata files')
tools.save_json(os.path.join(output_data_path, 'video_metadata.json'), video_metadata)
tools.save_yaml(os.path.join(output_data_path, 'video_metadata.yaml'), video_metadata)
tools.save_json(os.path.join(output_data_path, 'topic_tree_map.json'), topic_tree_map)
tools.save_yaml(os.path.join(output_data_path, 'topic_tree_map.yaml'), topic_tree_map)

logging.info('Writing metadata files for individual videos')
for youtube_id in video_metadata.keys():
    try:
        prefixed_youtube_id = tools.prefix(youtube_id)

        output_path = os.path.join(output_data_path, prefixed_youtube_id, prefixed_youtube_id + '_metadata')

        # prepare output audio dir
        try:
            os.mkdir(os.path.join(output_data_path, prefixed_youtube_id))
        except FileExistsError:

Esempio n. 19

0

Mostra file

File: menu.py Progetto: tmccombs/sgtk-menu

def main():
    # exit if already running, thanks to Slava V at https://stackoverflow.com/a/384493/4040598
    pid_file = os.path.join(tempfile.gettempdir(), 'sgtk-menu.pid')
    fp = open(pid_file, 'w')
    try:
        fcntl.lockf(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except IOError:
        sys.exit(0)

    global appendix_file
    parser = argparse.ArgumentParser(description="GTK menu for sway and i3")
    parser.add_argument("-b",
                        "--bottom",
                        action="store_true",
                        help="display menu at the bottom")
    favourites = parser.add_mutually_exclusive_group()
    favourites.add_argument("-f",
                            "--favourites",
                            action="store_true",
                            help="prepend 5 most used items")
    favourites.add_argument('-fn',
                            type=int,
                            help="prepend <FN> most used items")
    appenxid = parser.add_mutually_exclusive_group()
    appenxid.add_argument(
        "-a",
        "--append",
        action="store_true",
        help="append custom menu from {}".format(appendix_file))
    appenxid.add_argument("-af",
                          type=str,
                          help="append custom menu from {}".format(
                              os.path.join(config_dir, '<AF>')))
    parser.add_argument("-l",
                        type=str,
                        help="force language (e.g. \"de\" for German)")
    parser.add_argument("-s",
                        type=int,
                        default=20,
                        help="menu icon size (min: 16, max: 48, default: 20)")
    parser.add_argument(
        "-w",
        type=int,
        help="menu width in px (integer, default: screen width / 8)")
    parser.add_argument("-d",
                        type=int,
                        default=100,
                        help="menu delay in milliseconds (default: 100)")
    parser.add_argument(
        "-o",
        type=float,
        default=0.3,
        help="overlay opacity (min: 0.0, max: 1.0, default: 0.3)")
    parser.add_argument("-t",
                        type=int,
                        default=30,
                        help="sway submenu lines limit (default: 30)")
    global args
    args = parser.parse_args()
    if args.s < 16:
        args.s = 16
    elif args.s > 48:
        args.s = 48

    # Create default appendix file if not found
    if not os.path.isfile(appendix_file):
        save_default_appendix(appendix_file)

    # Replace appendix file name with custom - if any
    if args.af:
        appendix_file = os.path.join(config_dirs()[0], args.af)

    # cache stores number of clicks on each item
    global cache
    cache = load_json(cache_file)

    if not cache:
        save_json(cache, cache_file)
    global sorted_cache
    sorted_cache = sorted(cache.items(), reverse=True, key=lambda x: x[1])

    global locale
    locale = get_locale_string(args.l)
    category_names_dictionary = localized_category_names(locale)

    # replace additional category names with main ones
    for name in category_names:
        main_category_name = additional_to_main(name)
        try:
            localized_names_dictionary[
                main_category_name] = category_names_dictionary[
                    main_category_name]
        except:
            pass

    screen = Gdk.Screen.get_default()
    provider = Gtk.CssProvider()
    style_context = Gtk.StyleContext()
    style_context.add_provider_for_screen(
        screen, provider, Gtk.STYLE_PROVIDER_PRIORITY_APPLICATION)

    # find all .desktop entries, create DesktopEntry class instances;
    # DesktopEntry adds itself to the proper List in the class constructor
    list_entries()

    # Overlay window
    global win
    win = MainWindow()
    w, h = display_dimensions()
    win.resize(w, h)
    win.menu = build_menu()

    global menu_items_list
    menu_items_list = win.menu.get_children()

    win.menu.propagate_key_event = False
    win.menu.connect("key-release-event", win.search_items)
    # Let's reserve some width for long entries found with the search box
    if args.w:
        win.menu.set_property("width_request", args.w)
    else:
        win.menu.set_property("width_request",
                              int(win.screen_dimensions[0] / 8))
    win.show_all()

    GLib.timeout_add(args.d, open_menu)
    Gtk.main()

Esempio n. 20

0

Mostra file

File: sgtk-menu.py Progetto: saymoncoppi/sgtk-menu

def main():
    # exit if already running, thanks to Slava V at https://stackoverflow.com/a/384493/4040598
    pid_file = os.path.join(tempfile.gettempdir(), 'sgtk-menu.pid')
    fp = open(pid_file, 'w')
    try:
        fcntl.lockf(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except IOError:
        subprocess.run("pkill -f sgtk-menu", shell=True)
        sys.exit(2)

    global build_from_file
    parser = argparse.ArgumentParser(
        description="GTK menu for sway, i3 and some floating WMs")
    placement = parser.add_mutually_exclusive_group()
    placement.add_argument("-b",
                           "--bottom",
                           action="store_true",
                           help="display menu at the bottom (sway & i3 only)")
    placement.add_argument("-c",
                           "--center",
                           action="store_true",
                           help="center menu on the screen (sway & i3 only)")

    favourites = parser.add_mutually_exclusive_group()
    favourites.add_argument("-f",
                            "--favourites",
                            action="store_true",
                            help="prepend 5 most used items")
    favourites.add_argument('-fn',
                            type=int,
                            help="prepend <FN> most used items")

    appendix = parser.add_mutually_exclusive_group()
    appendix.add_argument(
        "-a",
        "--append",
        action="store_true",
        help="append custom menu from {}".format(build_from_file))
    appendix.add_argument("-af",
                          type=str,
                          help="append custom menu from {}".format(
                              os.path.join(config_dir, '<AF>')))

    parser.add_argument("-n",
                        "--no-menu",
                        action="store_true",
                        help="skip menu, display appendix only")
    parser.add_argument("-l",
                        type=str,
                        help="force language (e.g. \"de\" for German)")
    parser.add_argument("-s",
                        type=int,
                        default=20,
                        help="menu icon size (min: 16, max: 48, default: 20)")
    parser.add_argument(
        "-w",
        type=int,
        help="menu width in px (integer, default: screen width / 8)")
    parser.add_argument(
        "-d",
        type=int,
        default=100,
        help="menu delay in milliseconds (default: 100; sway & i3 only)")
    parser.add_argument(
        "-o",
        type=float,
        default=0.3,
        help="overlay opacity (min: 0.0, max: 1.0, default: 0.3; "
        "sway & i3 only)")
    parser.add_argument("-t",
                        type=int,
                        default=30,
                        help="sway submenu lines limit (default: 30)")
    parser.add_argument(
        "-y",
        type=int,
        default=0,
        help="y offset from edge to display menu at (sway & i3 only)")
    parser.add_argument(
        "-css",
        type=str,
        default="style.css",
        help="use alternative {} style sheet instead of style.css".format(
            os.path.join(config_dir, '<CSS>')))
    global args
    args = parser.parse_args()
    css_file = os.path.join(config_dirs()[0], args.css) if os.path.exists(
        os.path.join(config_dirs()[0], 'style.css')) else None

    if args.s < 16:
        args.s = 16
    elif args.s > 48:
        args.s = 48

    # We do not need any delay in other WMs
    if other_wm:
        args.d = 0

    # Create default config files if not found
    create_default_configs(config_dir)

    # Replace appendix file name with custom - if any
    if args.af:
        build_from_file = os.path.join(config_dirs()[0], args.af)

    if css_file:
        screen = Gdk.Screen.get_default()
        provider = Gtk.CssProvider()
        try:
            provider.load_from_path(css_file)
            Gtk.StyleContext.add_provider_for_screen(
                screen, provider, Gtk.STYLE_PROVIDER_PRIORITY_APPLICATION)
        except Exception as e:
            print(e)

    # cache stores number of clicks on each item
    global cache
    cache = load_json(cache_file)

    if not cache:
        save_json(cache, cache_file)
    global sorted_cache
    sorted_cache = sorted(cache.items(), reverse=True, key=lambda x: x[1])

    global locale
    locale = get_locale_string(args.l)
    category_names_dictionary = localized_category_names(locale)

    # replace additional category names with main ones
    for name in category_names:
        main_category_name = additional_to_main(name)
        try:
            localized_names_dictionary[
                main_category_name] = category_names_dictionary[
                    main_category_name]
        except:
            pass

    screen = Gdk.Screen.get_default()
    provider = Gtk.CssProvider()
    style_context = Gtk.StyleContext()
    style_context.add_provider_for_screen(
        screen, provider, Gtk.STYLE_PROVIDER_PRIORITY_APPLICATION)

    # find all .desktop entries, create DesktopEntry class instances;
    # DesktopEntry adds itself to the proper List in the class constructor
    list_entries()

    # Overlay window
    global win
    win = MainWindow()
    if other_wm:
        # We need this to obtain the screen geometry when i3ipc module unavailable
        win.resize(1, 1)
        win.show_all()
    global geometry
    # If we're not on sway neither i3, this won't return values until the window actually shows up.
    # Let's try as many times as needed. The retries int protects from an infinite loop.
    retries = 0
    while geometry[0] == 0 and geometry[1] == 0 and geometry[
            2] == 0 and geometry[3] == 0:
        geometry = display_geometry()
        retries += 1
        if retries > 500:
            print("\nFailed to get the current screen geometry, exiting...\n")
            sys.exit(2)
    x, y, w, h = geometry

    if not other_wm:
        win.resize(w, h)
    else:
        win.resize(1, 1)
        win.set_gravity(Gdk.Gravity.CENTER)
        if pynput:
            x, y = mouse_pointer.position
            win.move(x, y)
        else:
            win.move(0, 0)
            print("\nYou need the python-pynput package!\n")

    win.set_skip_taskbar_hint(True)
    win.menu = build_menu()
    win.menu.set_property("name", "menu")

    global menu_items_list
    menu_items_list = win.menu.get_children()

    win.menu.propagate_key_event = False
    win.menu.connect("key-release-event", win.search_items)
    # Let's reserve some width for long entries found with the search box
    if args.w:
        win.menu.set_property("width_request", args.w)
    else:
        win.menu.set_property("width_request",
                              int(win.screen_dimensions[0] / 8))
    win.show_all()

    GLib.timeout_add(args.d, open_menu)
    Gtk.main()

Esempio n. 21

0

Mostra file

def download_api_statuses(input: str,
                          config_file: str = None,
                          time_sleep: float = 1.1):
    """Goes to twitter API an get status info (hydratated) and saves into a json file (in "json" dir) and if Elasticsearch is identified send it too
    
    Arguments:
        input {str} -- [description]
        elasticsearch_url {str} -- [description]
    
    Keyword Arguments:
        elasticuser {str} -- [description] (default: {None})
        elasticpass {str} -- [description] (default: {None})
        elasticsearch_index {str} -- [description] (default: {STATUSES_INDEX})
    """
    settings = Settings()._load_config(config_file)

    # Create a connection with Elastic
    if settings.elasticsearch_url is not None:
        es = Elasticsearch(settings.elasticsearch_url)
        logger.info(es.info())
    else:
        es = None

    # Check if time_sleep is more than 1.1 secs
    try:
        assert time_sleep >= 1.1
    except:
        logger.error("Time Sleep less than 1.1 secs (minimum) ")
        raise err

    # Read MSGPACK file whith statuses id
    if ".msg" in input.lower():
        df = pd.read_msgpack(input)
    elif ".csv" in input.lower():
        df = pd.read_csv(input)

    api = twitter.Api(consumer_key=CONSUMER_KEY,
                      consumer_secret=CONSUMER_SECRET,
                      access_token_key=ACCESS_TOKEN_KEY,
                      access_token_secret=ACCESS_TOKEN_SECRET,
                      tweet_mode='extended')

    all_statuses_id = df['STATUS_ID'].tolist()

    # Go to Twitter API and get statuses by id
    logger.info("Downloading Statuses from 🐦 API")
    all_statuses_data = hydratate_status(api, all_statuses_id, time_sleep)

    # Save all jsons to file and load into Elastic
    logger.info("Processing Statuses from 🐦 API to save jsons")

    for c_status_data in tqdm(all_statuses_data):
        cur_dict = Cut(c_status_data.AsDict())
        cur_id_str = cur_dict['id_str']

        # Fix twitter dates to more 'standart' date format
        list_all_keys_w_dots = dotter(cur_dict.data, '', [])
        try:
            for created_at_keys in list_all_keys_w_dots:
                if 'created_at' in created_at_keys:
                    cur_dt = arrow.get(cur_dict[created_at_keys],
                                       TWITTER_DATETIME_PATTERN)
                    cur_dict[created_at_keys] = cur_dt.format(
                        "YYYY-MM-DDTHH:MM:SS") + "Z"
        except:
            logger.error("Error parsing dates on %s" % cur_id_str)

        # To ES improved data
        cur_json = _prepare_json_status(c_status_data)
        # TO FS orginal data backup
        cur_json_backup = json.dumps(c_status_data.AsDict(), indent=4)

        save_json(cur_json_backup,
                  settings.status_json_backup + cur_id_str + ".json")
        if es is not None:
            es.index(index=elasticsearch_index,
                     doc_type='status',
                     id=cur_id_str,
                     body=cur_json)

Esempio n. 22

0

Mostra file

def init_crawler(params):
    """ 初始化爬取工作
    :param params:
    :return:
    """
    # 图幅的中心点
    request_points = OrderedDict()

    # 四个角坐标
    nepoint = params["north_east_point"]
    # sepoint = params["south_east_point"]
    swpoint = params["south_west_point"]
    # nwpoint = params["north_west_point"]
    # 边界
    s_boundary, w_boundary = swpoint
    n_boundary, e_boundary = nepoint
    s_boundary = int(s_boundary)
    w_boundary = int(w_boundary)
    n_boundary = int(n_boundary + 0.5)
    e_boundary = int(e_boundary + 0.5)
    print s_boundary
    print w_boundary
    print n_boundary
    print e_boundary

    if abs(n_boundary - s_boundary) <= 1 and abs(e_boundary -
                                                 w_boundary) <= 1:  #小图幅
        center_point = params["center_point"]  #取出中心点
        value = {}
        value["req_pnt"] = center_point  #请求点

        imgs = do_get(*center_point)  #拿中心点请求，看是否正常
        # 如果图幅正常，而且有范围exetent
        if imgs != None and len(imgs) > 0 and len(imgs[0]) >= 3:
            value["extent"] = imgs[0][2]
            request_points["0,0"] = value
        else:  #不正常，len(request_points) is 0
            pass
    else:  #大图幅
        # 按1°分为一个图幅
        row = 0
        for x in range(s_boundary, n_boundary, 1):
            col = 0
            for y in range(w_boundary, e_boundary, 1):
                sheet_num = str(row) + ',' + str(col)
                value = {}
                value["req_pnt"] = (x, y)
                imgs = do_get(x, y)
                # 如果图幅正常，而且有范围exetent
                if imgs != None and len(imgs) > 0 and len(imgs[0]) >= 3:
                    value["extent"] = imgs[0][2]
                else:
                    value["extent"] = None
                request_points[sheet_num] = value
                col += 1
            row += 1

        save_path = save_json(params["out_dir"],
                              u"1 request_points - 1度为步长的centerpoint与extent",
                              request_points)
        print u"[FILE] 1度为步长的centerpoint与extent：{}".format(save_path)

        # 根据extent将请求点去重
        prior_extent = None
        for key, value in request_points.items():
            if prior_extent is None:
                prior_extent = value["extent"]
                continue
            else:
                now_extent = value["extent"]
                if prior_extent == now_extent:
                    request_points.pop(key)
                else:
                    prior_extent = now_extent

    # 计算request_point的长度，extent not None才是正常
    img_len = 0
    for key, value in request_points.items():
        if value["extent"] is not None:
            img_len += 1
        else:
            continue

    save_path = save_json(params["out_dir"], u"2 request_points - 每次爬取的中心点坐标",
                          request_points)
    print u"[FILE] 每次爬取的中心点坐标：{}".format(save_path)
    print "【进程】每次需要爬取{}张图片".format(img_len)

    return img_len, request_points

Esempio n. 23

0

Mostra file

File: logging_tools.py Progetto: jgrizou/openvault_web

 def log_url_info(self, url_info):
     url_info_filename = os.path.join(self.log_folder, 'url_info.json')
     tools.save_json(url_info_filename, url_info)