def refactor_dockerfile(src, dst): raw_text_path = os.path.join(old_dockerfiles_dir, src) # names = filename.split("_") dockerfile_json = dict() dockerfile_content = ch.read_text_from_file(raw_text_path) dockerfile_json["contents"] = dockerfile_content dockerfile_path = os.path.join(new_dockerfiles_dir, dst) ch.write_object_to_file(dockerfile_path, dockerfile_json)
def get_available_proxy_and_write_to_file(target_url): # return logging.info("start get proxy...") print "start get proxy..." proxy_addresses = get_available_proxy_addresses(proxy_config.proxy_url, target_url, proxy_config.proxy_headers) crawler_helper.write_object_to_file(proxy_config.proxy_path, proxy_addresses) logging.info("write proxy to file [%s]", proxy_config.proxy_path) print "done get proxy [%s]" % proxy_config.proxy_path
def write_save_user_ids_to_file(): """ write the result(save_user_ids) to file :return: """ old_ids = ch.read_object_from_file(cc.user_ids_path) user_ids = set() old_ids = set(old_ids) if old_ids is not None: user_ids = old_ids | save_user_ids user_ids = list(user_ids) ch.write_object_to_file(cc.user_ids_path, user_ids)
def crawler_statuses_and_write_to_file(): """ crawler statuses and write it to file :return: """ user_ids = ch.read_object_from_file(cc.user_ids_path) for user_id in user_ids: statuses = get_statuses_by_user_id(user_id=user_id) if statuses is None: logging.warn("statuses is None, user_id: [%s]" % user_id) continue file_name = "statuses_%s.json" % user_id path = os.path.join(cc.statuses_dir, file_name) ch.write_object_to_file(path, statuses)
def get_available_proxy_and_write_to_file(target_url, proxy_num=100): # return logging.info("start get proxy...") print "start get proxy..." all_proxy_addresses = {} proxy_page = 0 while len(all_proxy_addresses) < proxy_num: proxy_page += 1 proxy_url = os.path.join(proxy_config.proxy_base_url, str(proxy_page)) proxy_addresses = get_available_proxy_addresses(proxy_url, target_url, proxy_config.proxy_headers) all_proxy_addresses.update(proxy_addresses) crawler_helper.write_object_to_file(proxy_config.proxy_path, all_proxy_addresses) logging.info("done get proxy [%s]", proxy_config.proxy_path) print "done get proxy [%s]" % proxy_config.proxy_path
def refactor_dockerfiles_database(): sql = """select id, dockerfile_path from docker_manager.DockerManager_dockerversion where dockerfile_path like "%.txt" """ conn = mh.get_database_connection() cur = conn.cursor() cur.execute(sql) sqls = list() refactor_paths = list() # print cur.fetchall().__len__() for row in cur.fetchall(): docker_version_id = row[0] dockerfile_path = row[1] dockerfile_path_strip = str(dockerfile_path)[0:-4] names = re.split("/|_", dockerfile_path_strip, 3) print names docker_name = "%s/%s" % (names[2], names[3]) timestamp = "1970-01-01-00-00-00" new_dockerfile_path = ch.generate_dockerfile_fname(docker_name, timestamp=timestamp) refactor_path = dict() refactor_path["src"] = dockerfile_path refactor_path["dst"] = new_dockerfile_path refactor_paths.append(refactor_path) sql = """update docker_manager.DockerManager_dockerversion set dockerfile_path = "%s" where id = %s; """ \ % (new_dockerfile_path, docker_version_id) sqls.append(sql) cur.close() conn.close() ch.write_object_to_file( "D:\\IdeaProjects\\DockerManagerSystem\\docker-manager-system\\data\\refactor_dockerfiles.json", refactor_paths) # refactor dockerfiles print refactor_paths.__len__() # for refactor_path in refactor_paths: # refactor_dockerfile(refactor_path["src"], refactor_path["dst"]) # rename dockerfile names conn = mh.get_database_connection() update_cur = conn.cursor() update_cur.execute("begin;") for sql in sqls: print sql update_cur.execute(sql) update_cur.execute("commit;") update_cur.close() pass
def incremental_crawler(increment_data=None): """ crawler all docker and write to database(write dockerfile into file) increment_data will be a dict, which contains dockers waiting to modified :param increment_data: a dict(str:docker_name, bool:insert) :return: None """ if increment_data is None: increment_data = _get_increment_data_database() for docker_name in increment_data: print("Crawler: %s" % docker_name) insert = increment_data[docker_name] docker_full_info = _crawler_docker_full_info_with_name(docker_name) ch.write_object_to_file( "./%s_full_info.json" % docker_name.replace("/", "#"), docker_full_info.__dict__) _write_docker_full_info(docker_full_info=docker_full_info, insert=insert)
def generate_available_docker_names_and_write_to_file_for_stars_and_pulls(): all_docker_names = get_all_docker_names_database() available_docker_names = list() unavailable_docker_names = list() for docker_name in all_docker_names: docker = ch.get_docker_json_from_file(docker_name) if docker is None: unavailable_docker_names.append(docker_name) continue else: available_docker_names.append(docker_name) ch.write_object_to_file( cc.unavailable_docker_names_for_db_path_for_stars_and_pulls, unavailable_docker_names) ch.write_object_to_file( cc.available_docker_names_for_db_path_for_stars_and_pulls, available_docker_names) return available_docker_names pass
def _write_docker_full_info(docker_full_info, insert=False): """ write dockerfile to disk and the other info to datebase :param docker_full_info: a list of DockerFullInfo :param insert: True means insert a docker, and False means update a docker :return: True if success, else False """ docker_full_info = DockerFullInfo(docker_full_info.__dict__) if insert: sql = _generate_insert_sql(docker_full_info) else: sql = _generate_update_sql(docker_full_info) mh.execute_sqls([sql]) print("Write done: docker full info to database") dockerfile_fname = ch.generate_dockerfile_fname( docker_full_info.docker_name) dockerfile_path = os.path.join("./", dockerfile_fname) ch.write_object_to_file(dockerfile_path, docker_full_info.dockerfile) pass
def classify_available_docker_names_and_write_to_file(): """ generate available_new_docker_names and available_updated_docker_names for generate sql :return: available_updated_docker_names, available_new_docker_names """ available_all_docker_names_set = set( ch.read_object_from_file(cc.available_docker_names_for_db_path)) all_docker_names_db_set = set(get_all_docker_names_database()) available_updated_docker_names = list(available_all_docker_names_set & all_docker_names_db_set) available_new_docker_names = list(available_all_docker_names_set - all_docker_names_db_set) # for docker_name in available_all_docker_names: # docker = get_docker_database(docker_name) # if docker is None: # available_new_docker_names.append(docker_name) # else: # available_updated_docker_names.append(docker_name) ch.write_object_to_file(cc.available_new_docker_names_for_db_path, available_new_docker_names) ch.write_object_to_file(cc.available_updated_docker_names_for_db_path, available_updated_docker_names) return available_updated_docker_names, available_new_docker_names
def get_all_available_docker_names_and_write_to_file(): """ check if docker is available :return: return a list of docker names have docker json file in docker path """ all_docker_names = ch.get_all_docker_names() available_docker_names = list() unavailable_docker_names = list() for docker_name in all_docker_names: docker = ch.get_docker_json_from_file(docker_name) dockerfile = ch.get_dockerfile_json_from_file(docker_name) docker_versions = ch.get_docker_versions_json_file(docker_name) docker_tags = ch.get_docker_tags_from_file(docker_name) if docker is None or docker_versions is None or docker_tags is None or dockerfile is None: unavailable_docker_names.append(docker_name) continue else: available_docker_names.append(docker_name) ch.write_object_to_file(cc.unavailable_docker_names_for_db_path, unavailable_docker_names) ch.write_object_to_file(cc.available_docker_names_for_db_path, available_docker_names) return available_docker_names
def write_images_size_and_count_to_file(size, count): data = {"images_size": size, "images_count": count} ch.write_object_to_file(cc.images_size_count_path, data)