def run_query(today, language='JavaScript'): """ Fetch all repositories for the given month """ # set up database database = connect_to_db() # fetch data and write to database last_node = None monthly_search_str = get_monthly_search_str(today) while True: try: result = run_query_once(MAX_NODES_PER_LOOP, monthly_search_str, last_node, language) write_db(database, result, language) if len(result['data']['search']['edges']) > 0: last_node = result['data']['search']['edges'][-1]['cursor'] else: break except ValueError as exc: print( f"Could not run query starting at {last_node} for {monthly_search_str}: {exc}: {result}" ) break except TypeError as exc: print( f"Could not run query starting at {last_node} for {monthly_search_str}: {exc}: {result}" ) break # tear down database connection database.close()
def run_query(): """ Enrich the package table with metadata for all packages in the table """ # Connect to the database database = connect_to_db() cur = database.cursor() # Fetch all package names from the database cur.execute("SELECT name FROM packages WHERE name LIKE 'pkg:npm/%';") results = [result[0] for result in cur.fetchall()] # Fetch all metadata for the packages from npmjs.com and write to the database for result in results: print(f"Fetching metadata for {result}") metadata = get_package_metadata(result) update_package_metadata(database, metadata['name'], metadata['monthly_downloads_last_month'], metadata['monthly_downloads_a_year_ago'], metadata['categories'], metadata['modified']) # Commit the changes to the database database.commit() # Clean up the database connection cur.close() database.close()
def test_connect_to_db(self): """ Try connecting to the database """ database = connect_to_db() self.assertIsNotNone(database)
def test_insert_to_packages(self): """ Try inserting a package into the package table """ database = connect_to_db() name = "myPkg" package_id = insert_to_package(database, name) self.assertIsInstance(package_id, int) cur = database.cursor() cur.execute(f"SELECT name FROM packages WHERE id = { package_id };") package_name = cur.fetchone()[0] self.assertEqual(package_name, name)
def test_insert_to_application(self): """ Try inserting an application into the application table """ database = connect_to_db() url = "www.pkgpkr.com" followers = 314 app_name = "pkgpkr" my_hash = hash(app_name) app_id = insert_to_app(database, url, followers, app_name, my_hash) self.assertIsInstance(app_id, int) cur = database.cursor() cur.execute(f"SELECT name FROM applications WHERE id = { app_id };") application_name = cur.fetchone()[0] self.assertEqual(application_name, app_name)
def test_update_package_metadata(self): """ Try to update the metadata associated with a package """ database = connect_to_db() name = "myPkg" downloads_last_month = 200 categories = ["critical", ",,comma", "\\{braces\\}", "\'quoted\""] modified = datetime.datetime.now() # Insert package into the table package_id = insert_to_package(database, name) self.assertIsInstance(package_id, int) # Ensure that the modified field is None cur = database.cursor() cur.execute( f"SELECT modified FROM packages WHERE id = { package_id };") modified_date = cur.fetchone()[0] self.assertIsNone(modified_date) # Update metadata in the table update_package_metadata(database, name, downloads_last_month, downloads_last_month, categories, modified) # Ensure that the modified field is now not None cur.execute( f"SELECT modified FROM packages WHERE id = { package_id };") modified_date = cur.fetchone()[0] self.assertIsNotNone(modified_date) # Upsert the same package into the table again package_id = insert_to_package(database, name) self.assertIsInstance(package_id, int) # Ensure that the modified field is still not None cur.execute( f"SELECT modified FROM packages WHERE id = { package_id };") modified_date = cur.fetchone()[0] self.assertIsNotNone(modified_date)
def test_insert_to_dependencies(self): """ Try to insert a dependency into the dependency table """ database = connect_to_db() url = "www.pkgpkr.com" followers = 314 app_name = "pkgpkr" my_hash = hash(app_name) application_id = insert_to_app(database, url, followers, app_name, my_hash) name = "myPkg" package_id = insert_to_package(database, name) insert_to_dependencies(database, application_id, package_id) cur = database.cursor() cur.execute( f"SELECT * FROM dependencies WHERE application_id = { application_id } AND package_id ={ package_id };" ) result = cur.fetchall() self.assertEqual(result, [(application_id, package_id)])