예제 #1
0
 def delete_differences(self, r_id=None, py_id=None):
   query = {}
   if r_id:
     query["r_id"] = r_id
   if py_id:
     query["py_id"] = py_id
   if not query:
     raise Exception("Empty query. So use drop!")
   LOGGER.info("Deleting differences for query %s .... " % query)
   mongo_driver.get_collection(self.dataset, DIFFERENCES_COLLECTIONS).delete_many(query)
예제 #2
0
 def load_valid_snippets(self, language=None, use_normalized=False):
   collection_name = STMT_NORMALIZED_COLLECTION if use_normalized else STMT_COLLECTION
   projection = {"outputs": False}
   if language:
     stmts = mongo_driver.get_collection(self.dataset, collection_name).find({"language": language}, projection)
   else:
     stmts = mongo_driver.get_collection(self.dataset, collection_name).find({}, projection)
   valids = []
   for stmt in stmts:
     if stmt.get('variables', None):
       valids.append(stmt)
   return valids
예제 #3
0
 def load_stmts(self, language=None, is_valid=True, has_output=False, limit=None, use_normalized=False):
   collection_name = STMT_NORMALIZED_COLLECTION if use_normalized else STMT_COLLECTION
   if language:
     stmts = mongo_driver.get_collection(self.dataset, collection_name).find({"language": language})
   else:
     stmts = mongo_driver.get_collection(self.dataset, collection_name).find()
   formatted = {}
   for stmt in stmts:
     if (not is_valid or (is_valid and stmt.get('variables', None))) \
           and (not has_output or (has_output and stmt.get('outputs', None))):
       formatted[(stmt['snippet'], stmt['language'])] = stmt
     if limit and len(formatted) == limit:
       return formatted
   return formatted
예제 #4
0
 def save_inputs(self, inps):
   collection = mongo_driver.get_collection(self.dataset, INPUTS_COLLECTIONS)
   for inp in inps:
     arg_set = [arg.to_dict(orient='records') for arg in inp]
     collection.insert({
       "args": arg_set
     })
예제 #5
0
 def get_executed_functions(self, language):
     collection = mongo_driver.get_collection(
         self.dataset, "language_executed_functions")
     document = collection.find_one({"language": language})
     if document is None:
         return None
     return document['names']
예제 #6
0
 def store_normalized_stmt(self, stmt_dict):
   collection = mongo_driver.get_collection(self.dataset, STMT_NORMALIZED_COLLECTION)
   if not mongo_driver.is_collection_exists(collection):
     mongo_driver.create_unique_index_for_collection(collection, "snippet", "language")
   try:
     collection.insert(stmt_dict, continue_on_error=True)
   except pymongo.errors.DuplicateKeyError as e:
     pass
예제 #7
0
 def save_language_executed_function_names(self, language, names):
     collection = mongo_driver.get_collection(
         self.dataset, "language_executed_functions")
     if not mongo_driver.is_collection_exists(collection):
         mongo_driver.create_index_for_collection(collection, "language")
     if mongo_driver.contains_document(collection, "language", language):
         mongo_driver.delete_document(collection, "language", language)
     collection.insert({"language": language, "names": names})
예제 #8
0
 def load_meta(self, file_name):
     sep_positions = [m.start() for m in re.finditer(os.sep, file_name)]
     if sep_positions and len(sep_positions) > 3:
         fp_regex = file_name[sep_positions[2]:]
     else:
         fp_regex = file_name
     collection = mongo_driver.get_collection(self.dataset, "py_file_meta")
     return collection.find_one({"file_path": {"$regex": fp_regex}})
예제 #9
0
 def save_py_metadata(self, func_json):
     collection = mongo_driver.get_collection(self.dataset,
                                              "py_functions_metadata")
     if not mongo_driver.is_collection_exists(collection):
         mongo_driver.create_index_for_collection(collection, "name")
     if mongo_driver.contains_document(collection, "name",
                                       func_json["name"]):
         mongo_driver.delete_document(collection, "name", func_json["name"])
     collection.insert(func_json)
예제 #10
0
 def store_file_stmts(self, file_name, snippets, language):
   collection = mongo_driver.get_collection(self.dataset, FILE_STMT_COLLECTION)
   if not mongo_driver.is_collection_exists(collection):
     mongo_driver.create_unique_index_for_collection(collection, "file_name")
   collection.insert({
     "file_name": file_name,
     "snippets": snippets,
     "language": language
   })
예제 #11
0
 def store_stmt(self, snippet, language, variables):
   collection = mongo_driver.get_collection(self.dataset, STMT_COLLECTION)
   if not mongo_driver.is_collection_exists(collection):
     mongo_driver.create_unique_index_for_collection(collection, "snippet", "language")
   collection.insert({
     "snippet": snippet,
     "language": language,
     "variables": variables
   })
예제 #12
0
 def load_differences(self, r_id=None, py_id=None, additional_queries=None, projection=None, limit=0):
   collection = mongo_driver.get_collection(self.dataset, DIFFERENCES_COLLECTIONS)
   query = {}
   if r_id: query["r_id"] = r_id
   if py_id: query["py_id"] = py_id
   if additional_queries:
     query.update(additional_queries)
   if not limit:
     limit = 0
   return collection.find(query, projection).limit(limit)
예제 #13
0
 def load_function_arg_type(self, function_name):
     try:
         return mongo_driver.get_collection(
             self.dataset,
             "py_functions_arg_types").find_one({"name": function_name})
     except Exception as e:
         LOGGER.critical(
             "Failed to load args for function: '%s'. Returning None."
             "\nMessage: %s" % (function_name, e.message))
         return None
예제 #14
0
 def save_cloned_function_names(self, name, clones):
     collection = mongo_driver.get_collection(self.dataset,
                                              "cloned_functions")
     if not mongo_driver.is_collection_exists(collection):
         mongo_driver.create_index_for_collection(collection,
                                                  "_function_name_")
     if mongo_driver.contains_document(collection, "_function_name_", name):
         mongo_driver.delete_document(collection, "_function_name_", name)
     clones["_function_name_"] = name
     collection.insert(clones)
예제 #15
0
 def load_args(self, args_key):
     collection_name = "test_fuzzed_args" if self.is_test else "fuzzed_args"
     collection = mongo_driver.get_collection(self.dataset, collection_name)
     try:
         return collection.find_one({"key": args_key})
     except Exception as e:
         LOGGER.exception(
             "Failed to load args with key: '%s'. Returning None" %
             args_key)
         return None
예제 #16
0
 def load_py_metadata(self, function_name):
     try:
         collection = mongo_driver.get_collection(self.dataset,
                                                  "py_functions_metadata")
         return collection.find_one({"name": function_name})
     except Exception:
         LOGGER.exception(
             "Failed to metadata for function: '%s'. Returning None" %
             function_name)
         return None
예제 #17
0
 def save_py_function(self, function_json):
     collection_name = "test_py_functions_executed" if self.is_test else "py_functions_executed"
     collection = mongo_driver.get_collection(self.dataset, collection_name)
     if not mongo_driver.is_collection_exists(collection):
         mongo_driver.create_index_for_collection(collection, "name")
     try:
         collection.insert(function_json)
     except Exception:
         del function_json['outputs']
         self.save_failed_py_function(function_json)
예제 #18
0
 def load_inputs(self, column_names):
   collection = mongo_driver.get_collection(self.dataset, INPUTS_COLLECTIONS)
   inps = []
   for inp in collection.find():
     args = []
     for arg in inp["args"]:
       df = pd.DataFrame(arg).reindex(column_names, axis=1)
       args.append(df)
     inps.append(args)
   return inps
예제 #19
0
 def load_self_syntactic_differences(self, language=None, id_1=None, id_2=None,
                                     additional_queries=None, projection=None, limit=0):
   collection = mongo_driver.get_collection(self.dataset, SELF_SYNTACTIC_DIFFERENCES_COLLECTION)
   query = {}
   if id_1: query["id_1"] = id_1
   if id_2: query["id_2"] = id_2
   if language: query["language"] = language
   if additional_queries:
     query.update(additional_queries)
   if not limit:
     limit = 0
   return collection.find(query, projection).limit(limit)
예제 #20
0
 def load_inputs(self, args_key):
     arguments = mongo_driver.get_collection(
         self.dataset, "fuzzed_args").find_one({"key": args_key})["args"]
     assert len(arguments) == properties.FUZZ_ARGUMENT_SIZE
     if self.is_array(arguments):
         key_args = arguments
     else:
         key_args = [[] for _ in range(len(arguments[0]))]
         for i in range(len(arguments[0])):
             for arg in arguments:
                 key_args[i].append(arg)
     return key_args
예제 #21
0
 def create_stmt_file_map(self, stmt, stmt_file_map, do_log=True):
   collection = mongo_driver.get_collection(self.dataset, STMT_FILE_COLLECTION)
   if not mongo_driver.is_collection_exists(collection):
     mongo_driver.create_index_for_collection(collection, "snippet")
   try:
     doc = {"snippet": stmt}
     doc.update(stmt_file_map)
     collection.insert(doc)
   except pymongo.errors.DuplicateKeyError as e:
     if do_log:
       LOGGER.warning(e.message)
       LOGGER.info("We continue ... ")
예제 #22
0
 def update_stmt_outputs(self, stmt_id, outputs):
   collection = mongo_driver.get_collection(self.dataset, STMT_COLLECTION)
   stmt = collection.find_one({'_id': stmt_id})
   stmt['outputs'] = outputs
   try:
     collection.update_one({'_id': stmt_id}, {"$set": stmt}, upsert=False)
   except Exception:
     stmt['outputs'] = None
     try:
       collection.update_one({'_id': stmt_id}, {"$set": stmt}, upsert=False)
     except Exception as e:
      # import pprint
      # pprint.pprint(outputs[outputs.keys()[0]])
       raise e
예제 #23
0
 def save_clusters(self, clusters, suffix):
     collection_name = "clusters_%s" % suffix
     collection = mongo_driver.get_collection(self.dataset, collection_name)
     if not mongo_driver.is_collection_exists(collection):
         mongo_driver.create_unique_index_for_collection(
             collection, "cluster_id")
     for cluster_id, functions in clusters.items():
         LOGGER.info("Saving cluster: '%d', with %d functions" %
                     (cluster_id, len(functions)))
         cluster = {
             "cluster_id": cluster_id,
             "functions": [lib.to_json(f) for f in functions]
         }
         collection.insert(cluster)
예제 #24
0
 def save_self_syntactic_differences(self, records, do_log=True):
   collection = mongo_driver.get_collection(self.dataset, SELF_SYNTACTIC_DIFFERENCES_COLLECTION)
   if not mongo_driver.is_collection_exists(collection):
     mongo_driver.create_unique_index_for_collection(collection, "id_1", "id_2", "language")
     mongo_driver.create_index_for_collection(collection, "d_levenshtein")
     mongo_driver.create_index_for_collection(collection, "d_jaro")
     mongo_driver.create_index_for_collection(collection, "d_jaro_winkler")
     mongo_driver.create_index_for_collection(collection, "d_n_gram")
     mongo_driver.create_index_for_collection(collection, "d_ast")
   try:
     collection.insert_many(records)
   except pymongo.errors.DuplicateKeyError as e:
     if do_log:
       LOGGER.warning(e.message)
       LOGGER.info("We continue ... ")
예제 #25
0
 def load_difference(self, r_id, py_id, limit=0):
   collection = mongo_driver.get_collection(self.dataset, DIFFERENCES_COLLECTIONS)
   query = {}
   if r_id: query["r_id"] = r_id
   if py_id: query["py_id"] = py_id
   document = collection.find(query).limit(limit)
   docs = []
   for doc in document:
     diff = []
     for d in doc["diff"]:
       if d:
         diff.append(differences.DiffMeta.from_dict(d))
       else:
         diff.append(None)
     doc['diff'] = diff
     doc["r_return"] = doc["r_return"]
     doc["py_return"] = doc["py_return"]
     docs.append(doc)
   return docs
예제 #26
0
 def save_difference(self, r_id, py_id, r_return, py_return, diff, do_log=True):
   collection = mongo_driver.get_collection(self.dataset, DIFFERENCES_COLLECTIONS)
   if not mongo_driver.is_collection_exists(collection):
     mongo_driver.create_unique_index_for_collection(collection, "r_id", "py_id", "r_return", "py_return")
     mongo_driver.create_index_for_collection(collection, "d_levenshtein")
     mongo_driver.create_index_for_collection(collection, "d_jaro")
     mongo_driver.create_index_for_collection(collection, "d_jaro_winkler")
     mongo_driver.create_index_for_collection(collection, "d_n_gram")
     mongo_driver.create_index_for_collection(collection, "d_ast")
   try:
     collection.insert({
       "r_id": r_id,
       "py_id": py_id,
       "r_return": r_return,
       "py_return": py_return,
       "diff": [d.to_dict() if d else None for d in diff]
     })
   except pymongo.errors.DuplicateKeyError as e:
     if do_log:
       LOGGER.warning(e.message)
       LOGGER.info("We continue ... ")
예제 #27
0
 def delete_file_stmts(self, language=None):
   if language:
     mongo_driver.get_collection(self.dataset, FILE_STMT_COLLECTION).delete_many({"language": language})
   else:
     mongo_driver.get_collection(self.dataset, FILE_STMT_COLLECTION).drop()
예제 #28
0
 def load_file_stmts(self, language=None):
   if language:
     return mongo_driver.get_collection(self.dataset, FILE_STMT_COLLECTION).find({"language": language})
   else:
     return mongo_driver.get_collection(self.dataset, FILE_STMT_COLLECTION).find()
예제 #29
0
 def load_stmts_for_file_name(self, file_name):
   try:
     return mongo_driver.get_collection(self.dataset, FILE_STMT_COLLECTION).find_one({"file_name": file_name})
   except Exception:
     LOGGER.critical("Failed to load file name : %s" % file_name)
     return None
예제 #30
0
def _test():
  collection = mongo_driver.get_collection("Misconceptions", "differences")
  print(mongo_driver.is_collection_exists(collection))