def get_all_set_named_deps(limit=10, use_rq="rq"): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.named_deps == None) q = q.order_by(GithubRepo.login) q = q.limit(limit) enqueue_jobs(GithubRepo, "set_named_deps", q, 5, use_rq)
def set_all_requirements_pypi(q_limit=9500, use_rq="rq"): # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.requirements_pypi == None) q = q.filter(GithubRepo.requirements != []) q = q.order_by(GithubRepo.login) q = q.limit(q_limit) enqueue_jobs(GithubRepo, "set_requirements_pypi", q, 7, use_rq)
def set_all_pypi_dependencies(q_limit=100, use_rq='rq'): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.dependency_lines != None) q = q.filter(GithubRepo.pypi_dependencies == None) q = q.filter(GithubRepo.language == "python") q = q.order_by(GithubRepo.login) q = q.limit(q_limit) enqueue_jobs(GithubRepo, "set_pypi_dependencies", q, 6, use_rq)
def set_all_cran_descr_file_names(limit=10, use_rq="rq"): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.cran_descr_file != None) q = q.filter(GithubRepo.cran_descr_file != "not_found") # q = q.filter(GithubRepo.api_raw.contains({"fork": False})) #already did this when we made it q = q.order_by(GithubRepo.login) q = q.limit(limit) enqueue_jobs(GithubRepo, "set_cran_descr_file_name", q, 9, use_rq)
def get_all_setup_py_no_forks(limit=10, use_rq="rq"): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.reqs_file != None) q = q.filter(GithubRepo.setup_py_no_forks == None) q = q.filter(GithubRepo.api_raw.contains({"fork":False})) q = q.order_by(GithubRepo.login) q = q.limit(limit) enqueue_jobs(GithubRepo, "set_setup_py_no_forks", q, 8, use_rq)
def set_all_cran_descr_file(limit=10, use_rq="rq"): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.api_raw.contains({"fork": False})) q = q.filter(GithubRepo.language == 'r') q = q.filter(GithubRepo.cran_descr_file == None) q = q.order_by(GithubRepo.login) q = q.limit(limit) enqueue_jobs(GithubRepo, "set_cran_descr_file", q, 3, use_rq)
def get_all_setup_py_no_forks(limit=10, use_rq="rq"): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.reqs_file != None) q = q.filter(GithubRepo.setup_py_no_forks == None) q = q.filter(GithubRepo.api_raw.contains({"fork": False})) q = q.order_by(GithubRepo.login) q = q.limit(limit) enqueue_jobs(GithubRepo, "set_setup_py_no_forks", q, 8, use_rq)
def set_all_setup_py_names(limit=10, use_rq="rq"): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.setup_py_no_forks != None) q = q.filter(GithubRepo.bucket == None) # just a speed optimization q = q.filter(GithubRepo.api_raw.contains({"fork": False})) q = q.order_by(GithubRepo.login) q = q.limit(limit) enqueue_jobs(GithubRepo, "set_setup_py_name", q, 1, use_rq)
def set_all_requirements(q_limit=9500): # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.reqs_file_tried == None) q = q.filter(GithubRepo.language == "python") q = q.order_by(GithubRepo.login) q = q.limit(q_limit) return enqueue_jobs(q, set_requirements, 0)
def set_all_cran_dependencies(q_limit=100): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.dependency_lines != None) q = q.filter(GithubRepo.cran_dependencies == None) q = q.filter(GithubRepo.language == "r") q = q.order_by(GithubRepo.login) q = q.limit(q_limit) return enqueue_jobs(q, set_cran_dependencies, 0)
def set_all_zip_filenames(q_limit=100): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(~GithubRepo.api_raw.has_key('error_code')) q = q.filter(GithubRepo.zip_download_error == None) q = q.filter(GithubRepo.zip_filenames_tried == None) q = q.order_by(GithubRepo.login) q = q.limit(q_limit) return enqueue_jobs(q, set_zip_filenames, 0)
def add_all_github_dependency_lines(q_limit=100): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(~GithubRepo.api_raw.has_key('error_code')) q = q.filter(GithubRepo.zip_download_error == None) q = q.filter(GithubRepo.zip_download_elapsed == None) q = q.order_by(GithubRepo.login) q = q.limit(q_limit) return enqueue_jobs(q, add_github_dependency_lines, 0)
def add_all_r_github_dependency_lines(q_limit=100): q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.dependency_lines == None) q = q.filter(GithubRepo.zip_download_error == None) q = q.filter(GithubRepo.zip_download_elapsed == None) q = q.filter(GithubRepo.language == 'r') q = q.order_by(GithubRepo.login) q = q.limit(q_limit) return enqueue_jobs(q, add_github_dependency_lines, 0)
def set_all_pypi_in_formal_only(q_limit=9500, run_mode='with_rq'): # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.requirements_pypi != []) q = q.filter(GithubRepo.pypi_dependencies != []) q = q.order_by(GithubRepo.login) q = q.limit(q_limit) if run_mode=='with_rq': return enqueue_jobs(q, set_pypi_in_formal_only, 0) else: for row in q.all(): #print "setting this row", row set_pypi_in_formal_only(row[0], row[1])
def set_all_pypi_in_formal_only(q_limit=9500, run_mode='with_rq'): # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each q = db.session.query(GithubRepo.login, GithubRepo.repo_name) q = q.filter(GithubRepo.requirements_pypi != []) q = q.filter(GithubRepo.pypi_dependencies != []) q = q.order_by(GithubRepo.login) q = q.limit(q_limit) if run_mode == 'with_rq': return enqueue_jobs(q, set_pypi_in_formal_only, 0) else: for row in q.all(): #print "setting this row", row set_pypi_in_formal_only(row[0], row[1])