def save_language_executed_function_names(self, language, names): collection = mongo_driver.get_collection( self.dataset, "language_executed_functions") if not mongo_driver.is_collection_exists(collection): mongo_driver.create_index_for_collection(collection, "language") if mongo_driver.contains_document(collection, "language", language): mongo_driver.delete_document(collection, "language", language) collection.insert({"language": language, "names": names})
def store_normalized_stmt(self, stmt_dict): collection = mongo_driver.get_collection(self.dataset, STMT_NORMALIZED_COLLECTION) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_unique_index_for_collection(collection, "snippet", "language") try: collection.insert(stmt_dict, continue_on_error=True) except pymongo.errors.DuplicateKeyError as e: pass
def store_stmt(self, snippet, language, variables): collection = mongo_driver.get_collection(self.dataset, STMT_COLLECTION) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_unique_index_for_collection(collection, "snippet", "language") collection.insert({ "snippet": snippet, "language": language, "variables": variables })
def store_file_stmts(self, file_name, snippets, language): collection = mongo_driver.get_collection(self.dataset, FILE_STMT_COLLECTION) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_unique_index_for_collection(collection, "file_name") collection.insert({ "file_name": file_name, "snippets": snippets, "language": language })
def save_py_metadata(self, func_json): collection = mongo_driver.get_collection(self.dataset, "py_functions_metadata") if not mongo_driver.is_collection_exists(collection): mongo_driver.create_index_for_collection(collection, "name") if mongo_driver.contains_document(collection, "name", func_json["name"]): mongo_driver.delete_document(collection, "name", func_json["name"]) collection.insert(func_json)
def save_py_function(self, function_json): collection_name = "test_py_functions_executed" if self.is_test else "py_functions_executed" collection = mongo_driver.get_collection(self.dataset, collection_name) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_index_for_collection(collection, "name") try: collection.insert(function_json) except Exception: del function_json['outputs'] self.save_failed_py_function(function_json)
def save_cloned_function_names(self, name, clones): collection = mongo_driver.get_collection(self.dataset, "cloned_functions") if not mongo_driver.is_collection_exists(collection): mongo_driver.create_index_for_collection(collection, "_function_name_") if mongo_driver.contains_document(collection, "_function_name_", name): mongo_driver.delete_document(collection, "_function_name_", name) clones["_function_name_"] = name collection.insert(clones)
def create_stmt_file_map(self, stmt, stmt_file_map, do_log=True): collection = mongo_driver.get_collection(self.dataset, STMT_FILE_COLLECTION) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_index_for_collection(collection, "snippet") try: doc = {"snippet": stmt} doc.update(stmt_file_map) collection.insert(doc) except pymongo.errors.DuplicateKeyError as e: if do_log: LOGGER.warning(e.message) LOGGER.info("We continue ... ")
def save_clusters(self, clusters, suffix): collection_name = "clusters_%s" % suffix collection = mongo_driver.get_collection(self.dataset, collection_name) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_unique_index_for_collection( collection, "cluster_id") for cluster_id, functions in clusters.items(): LOGGER.info("Saving cluster: '%d', with %d functions" % (cluster_id, len(functions))) cluster = { "cluster_id": cluster_id, "functions": [lib.to_json(f) for f in functions] } collection.insert(cluster)
def save_self_syntactic_differences(self, records, do_log=True): collection = mongo_driver.get_collection(self.dataset, SELF_SYNTACTIC_DIFFERENCES_COLLECTION) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_unique_index_for_collection(collection, "id_1", "id_2", "language") mongo_driver.create_index_for_collection(collection, "d_levenshtein") mongo_driver.create_index_for_collection(collection, "d_jaro") mongo_driver.create_index_for_collection(collection, "d_jaro_winkler") mongo_driver.create_index_for_collection(collection, "d_n_gram") mongo_driver.create_index_for_collection(collection, "d_ast") try: collection.insert_many(records) except pymongo.errors.DuplicateKeyError as e: if do_log: LOGGER.warning(e.message) LOGGER.info("We continue ... ")
def save_difference(self, r_id, py_id, r_return, py_return, diff, do_log=True): collection = mongo_driver.get_collection(self.dataset, DIFFERENCES_COLLECTIONS) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_unique_index_for_collection(collection, "r_id", "py_id", "r_return", "py_return") mongo_driver.create_index_for_collection(collection, "d_levenshtein") mongo_driver.create_index_for_collection(collection, "d_jaro") mongo_driver.create_index_for_collection(collection, "d_jaro_winkler") mongo_driver.create_index_for_collection(collection, "d_n_gram") mongo_driver.create_index_for_collection(collection, "d_ast") try: collection.insert({ "r_id": r_id, "py_id": py_id, "r_return": r_return, "py_return": py_return, "diff": [d.to_dict() if d else None for d in diff] }) except pymongo.errors.DuplicateKeyError as e: if do_log: LOGGER.warning(e.message) LOGGER.info("We continue ... ")
def _test(): collection = mongo_driver.get_collection("Misconceptions", "differences") print(mongo_driver.is_collection_exists(collection))
def save_failed_py_function(self, function_json): collection_name = "test_py_functions_failed" if self.is_test else "py_functions_failed" collection = mongo_driver.get_collection(self.dataset, collection_name) if not mongo_driver.is_collection_exists(collection): mongo_driver.create_index_for_collection(collection, "name") collection.insert(function_json)
def update_function_arg_type(self, function_name, function_arg_types): collection = mongo_driver.get_collection(self.dataset, "py_functions_arg_types") if not mongo_driver.is_collection_exists(collection): mongo_driver.create_index_for_collection(collection, "name") collection.insert({"name": function_name, "types": function_arg_types})
def save_meta(self, bson_dict): collection = mongo_driver.get_collection(self.dataset, "py_file_meta") if not mongo_driver.is_collection_exists(collection): mongo_driver.create_index_for_collection(collection, "file_path") collection.insert(bson_dict)