Ejemplo n.º 1
0
def refactor_dockerfile(src, dst):
    raw_text_path = os.path.join(old_dockerfiles_dir, src)
    # names = filename.split("_")
    dockerfile_json = dict()
    dockerfile_content = ch.read_text_from_file(raw_text_path)
    dockerfile_json["contents"] = dockerfile_content
    dockerfile_path = os.path.join(new_dockerfiles_dir, dst)
    ch.write_object_to_file(dockerfile_path, dockerfile_json)
Ejemplo n.º 2
0
def get_available_proxy_and_write_to_file(target_url):
    # return
    logging.info("start get proxy...")
    print "start get proxy..."
    proxy_addresses = get_available_proxy_addresses(proxy_config.proxy_url, target_url, proxy_config.proxy_headers)
    crawler_helper.write_object_to_file(proxy_config.proxy_path, proxy_addresses)
    logging.info("write proxy to file [%s]", proxy_config.proxy_path)
    print "done get proxy [%s]" % proxy_config.proxy_path
Ejemplo n.º 3
0
def write_save_user_ids_to_file():
    """
    write the result(save_user_ids) to file
    :return:
    """
    old_ids = ch.read_object_from_file(cc.user_ids_path)
    user_ids = set()
    old_ids = set(old_ids)
    if old_ids is not None:
        user_ids = old_ids | save_user_ids
    user_ids = list(user_ids)
    ch.write_object_to_file(cc.user_ids_path, user_ids)
Ejemplo n.º 4
0
def crawler_statuses_and_write_to_file():
    """
    crawler statuses and write it to file
    :return:
    """
    user_ids = ch.read_object_from_file(cc.user_ids_path)
    for user_id in user_ids:
        statuses = get_statuses_by_user_id(user_id=user_id)
        if statuses is None:
            logging.warn("statuses is None, user_id: [%s]" % user_id)
            continue
        file_name = "statuses_%s.json" % user_id
        path = os.path.join(cc.statuses_dir, file_name)
        ch.write_object_to_file(path, statuses)
Ejemplo n.º 5
0
def get_available_proxy_and_write_to_file(target_url, proxy_num=100):
    # return
    logging.info("start get proxy...")
    print "start get proxy..."

    all_proxy_addresses = {}
    proxy_page = 0
    while len(all_proxy_addresses) < proxy_num:
        proxy_page += 1
        proxy_url = os.path.join(proxy_config.proxy_base_url, str(proxy_page))
        proxy_addresses = get_available_proxy_addresses(proxy_url, target_url, proxy_config.proxy_headers)
        all_proxy_addresses.update(proxy_addresses)
    crawler_helper.write_object_to_file(proxy_config.proxy_path, all_proxy_addresses)
    logging.info("done get proxy [%s]", proxy_config.proxy_path)
    print "done get proxy [%s]" % proxy_config.proxy_path
Ejemplo n.º 6
0
def refactor_dockerfiles_database():
    sql = """select id, dockerfile_path from docker_manager.DockerManager_dockerversion where dockerfile_path like "%.txt" """

    conn = mh.get_database_connection()
    cur = conn.cursor()
    cur.execute(sql)
    sqls = list()
    refactor_paths = list()
    # print cur.fetchall().__len__()
    for row in cur.fetchall():
        docker_version_id = row[0]
        dockerfile_path = row[1]
        dockerfile_path_strip = str(dockerfile_path)[0:-4]
        names = re.split("/|_", dockerfile_path_strip, 3)
        print names
        docker_name = "%s/%s" % (names[2], names[3])
        timestamp = "1970-01-01-00-00-00"
        new_dockerfile_path = ch.generate_dockerfile_fname(docker_name,
                                                           timestamp=timestamp)
        refactor_path = dict()
        refactor_path["src"] = dockerfile_path
        refactor_path["dst"] = new_dockerfile_path
        refactor_paths.append(refactor_path)
        sql = """update docker_manager.DockerManager_dockerversion set dockerfile_path = "%s" where id = %s; """ \
              % (new_dockerfile_path, docker_version_id)

        sqls.append(sql)
    cur.close()
    conn.close()
    ch.write_object_to_file(
        "D:\\IdeaProjects\\DockerManagerSystem\\docker-manager-system\\data\\refactor_dockerfiles.json",
        refactor_paths)
    # refactor dockerfiles
    print refactor_paths.__len__()
    # for refactor_path in refactor_paths:
    #     refactor_dockerfile(refactor_path["src"], refactor_path["dst"])

    # rename dockerfile names
    conn = mh.get_database_connection()
    update_cur = conn.cursor()
    update_cur.execute("begin;")
    for sql in sqls:
        print sql
        update_cur.execute(sql)
    update_cur.execute("commit;")
    update_cur.close()
    pass
Ejemplo n.º 7
0
def incremental_crawler(increment_data=None):
    """
    crawler all docker and write to database(write dockerfile into file)
    increment_data will be a dict, which contains dockers waiting to modified
    :param increment_data: a dict(str:docker_name, bool:insert)
    :return: None
    """
    if increment_data is None:
        increment_data = _get_increment_data_database()
    for docker_name in increment_data:
        print("Crawler: %s" % docker_name)
        insert = increment_data[docker_name]
        docker_full_info = _crawler_docker_full_info_with_name(docker_name)
        ch.write_object_to_file(
            "./%s_full_info.json" % docker_name.replace("/", "#"),
            docker_full_info.__dict__)
        _write_docker_full_info(docker_full_info=docker_full_info,
                                insert=insert)
Ejemplo n.º 8
0
def generate_available_docker_names_and_write_to_file_for_stars_and_pulls():
    all_docker_names = get_all_docker_names_database()
    available_docker_names = list()
    unavailable_docker_names = list()
    for docker_name in all_docker_names:
        docker = ch.get_docker_json_from_file(docker_name)
        if docker is None:
            unavailable_docker_names.append(docker_name)
            continue
        else:
            available_docker_names.append(docker_name)
    ch.write_object_to_file(
        cc.unavailable_docker_names_for_db_path_for_stars_and_pulls,
        unavailable_docker_names)
    ch.write_object_to_file(
        cc.available_docker_names_for_db_path_for_stars_and_pulls,
        available_docker_names)
    return available_docker_names
    pass
Ejemplo n.º 9
0
def _write_docker_full_info(docker_full_info, insert=False):
    """
    write dockerfile to disk and the other info to datebase
    :param docker_full_info: a list of DockerFullInfo
    :param insert: True means insert a docker, and False means update a docker
    :return: True if success, else False
    """
    docker_full_info = DockerFullInfo(docker_full_info.__dict__)
    if insert:
        sql = _generate_insert_sql(docker_full_info)
    else:
        sql = _generate_update_sql(docker_full_info)

    mh.execute_sqls([sql])
    print("Write done: docker full info to database")
    dockerfile_fname = ch.generate_dockerfile_fname(
        docker_full_info.docker_name)
    dockerfile_path = os.path.join("./", dockerfile_fname)
    ch.write_object_to_file(dockerfile_path, docker_full_info.dockerfile)
    pass
Ejemplo n.º 10
0
def classify_available_docker_names_and_write_to_file():
    """
    generate available_new_docker_names and available_updated_docker_names for generate sql
    :return: available_updated_docker_names, available_new_docker_names
    """
    available_all_docker_names_set = set(
        ch.read_object_from_file(cc.available_docker_names_for_db_path))
    all_docker_names_db_set = set(get_all_docker_names_database())
    available_updated_docker_names = list(available_all_docker_names_set
                                          & all_docker_names_db_set)
    available_new_docker_names = list(available_all_docker_names_set -
                                      all_docker_names_db_set)
    # for docker_name in available_all_docker_names:
    #     docker = get_docker_database(docker_name)
    #     if docker is None:
    #         available_new_docker_names.append(docker_name)
    #     else:
    #         available_updated_docker_names.append(docker_name)
    ch.write_object_to_file(cc.available_new_docker_names_for_db_path,
                            available_new_docker_names)
    ch.write_object_to_file(cc.available_updated_docker_names_for_db_path,
                            available_updated_docker_names)
    return available_updated_docker_names, available_new_docker_names
Ejemplo n.º 11
0
def get_all_available_docker_names_and_write_to_file():
    """
    check if docker is available
    :return: return a list of docker names have docker json file in docker path
    """
    all_docker_names = ch.get_all_docker_names()
    available_docker_names = list()
    unavailable_docker_names = list()
    for docker_name in all_docker_names:
        docker = ch.get_docker_json_from_file(docker_name)
        dockerfile = ch.get_dockerfile_json_from_file(docker_name)
        docker_versions = ch.get_docker_versions_json_file(docker_name)
        docker_tags = ch.get_docker_tags_from_file(docker_name)
        if docker is None or docker_versions is None or docker_tags is None or dockerfile is None:
            unavailable_docker_names.append(docker_name)
            continue
        else:
            available_docker_names.append(docker_name)
    ch.write_object_to_file(cc.unavailable_docker_names_for_db_path,
                            unavailable_docker_names)
    ch.write_object_to_file(cc.available_docker_names_for_db_path,
                            available_docker_names)
    return available_docker_names
Ejemplo n.º 12
0
def write_images_size_and_count_to_file(size, count):
    data = {"images_size": size, "images_count": count}
    ch.write_object_to_file(cc.images_size_count_path, data)