docs_match_update += upd_res.matched_count docs_modified_update += upd_res.modified_count if upd_res.upserted_id is not None: docs_inserted += 1 print("Documents Inserted: ", docs_inserted) print("Documents Matched Update Filter: ", docs_match_update) print("Documents Modified: ", docs_modified_update) print() """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/botnets/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation""" analysis = DescriptiveAnalysis(collection=db.threats.botscout, path=server_path) analysis(query=None, projection=None) # returns a pandas data frame data_frame = analysis.time_series_analysis('mongoDate') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesBotnetsDetailed") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesBotnetsDetailed") last_week = datetime.utcnow() - timedelta(days=7) analysis_2 = DescriptiveAnalysis(collection=db.threats.botscout, path=server_path) analysis_2(query={'mongoDate': {"$gte": last_week}}, projection={"_id": 0}) top_countries = analysis_2.top_n(10, "Country", 'botnets-detailed-top-countries') print(top_countries) ''' Export current MongoDB collection instance '''
docs_match_update += upd_res.matched_count docs_modified_update += upd_res.modified_count if upd_res.upserted_id is not None: docs_inserted += 1 print("Documents Inserted: ", docs_inserted) print("Documents Matched Update Filter: ", docs_match_update) print("Documents Modified: ", docs_modified_update) print() """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/botnets/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation """ analysis = DescriptiveAnalysis(collection=db.threats.ipmasterlist, path=server_path) analysis(query={}, projection={"_id": 0}) # returns a pandas data frame data_frame = analysis.time_series_analysis('mongoDate') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesBotnets") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesBotnets") last_week = datetime.utcnow() - timedelta(days=7) analysis_2 = DescriptiveAnalysis(collection=db.threats.ipmasterlist, path=server_path) analysis_2(query={'mongoDate': {"$gte": last_week}}, projection={"_id": 0}) analysis_2.top_n(10, "IP", "botnets-top-ips") ''' Export current MongoDB collection instance ''' # ipmasterlist_collection = ExportCollectionData(collection=db.threats.ipmasterlist, path=server_path) # ipmasterlist_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0})
# crawl indicator html_links = crawl_malware_domains(URL) # scrape data from links data = scrape_and_model_content(html_links) ''' Store instance of scraped data in MongoDB ''' db = connect_to_mongodb() db.threats.malwaredomains.drop() res = db.threats.malwaredomains.insert_many(data) print("Documents Inserted: ", len(res.inserted_ids)) """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/malware/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation""" analysis = DescriptiveAnalysis(collection=db.threats.malwaredomains, path=server_path) analysis(query={}, projection={'_id': 0}) # returns a pandas data frame data_frame = analysis.time_series_analysis_per_month('mongoDate') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesMalware") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesMalware") top5 = analysis.top_n(5, "Subcategory", "malware-top-subcategories") print(top5) ''' Export current MongoDB collection instance ''' # malwaredomains_collection = ExportCollectionData(collection=db.threats.malwaredomains, path=server_path) # malwaredomains_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0}) # malwaredomains_collection.export_collection_to_json("dataset-malware") # # csv_header = ["Category", "Entity-Type", "Domain", "Subcategory", "Submitted-By", "TimestampUTC", "DatetimeUTC", "TimestampUTC-CTI", "DatetimeUTC-CTI", "State"] # malwaredomains_collection.export_collection_to_csv("dataset-malware", csv_header)
for doc in cursor: try: upd_res = db.threats.phishtank_repository.update_one( {"_id": doc["_id"]}, {"$set": doc}, upsert=True) docs_match_update += upd_res.matched_count docs_modified_update += upd_res.modified_count if upd_res.upserted_id is not None: docs_inserted += 1 except OperationFailure as e: print("Error in mongoDB operation > ", e) print("Documents Inserted: ", docs_inserted) print("Documents Matched Update Filter: ", docs_match_update) print("Documents Modified: ", docs_modified_update) """ Descriptive Analysis for Phishing Repository""" analysis = DescriptiveAnalysis(collection=db.threats.phishtank_repository, path=server_path) analysis(query={}, projection={"_id": 0}) # Time Series Anaalysis data_frame = analysis.time_series_analysis('mongoDate') analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesPhishing") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesPhishing") # Top Author Categorical Analysis last_week = datetime.utcnow() - timedelta(days=7) analysis_2 = DescriptiveAnalysis( collection=db.threats.phishtank_repository, path=server_path) analysis_2(query={'mongoDate': {"$gte": last_week}}, projection={"_id": 0}) top5 = analysis_2.top_n(5, "Submitted-by", "phishing-top-submitters") print(top5) ''' Export Current MongoDB Collection Instance ''' # exploitDataBase = ExportCollectionData(collection=db.threats.phishtank_repository, path=server_path) # exploitDataBase(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0})
# start, end = today_datetime() # # delete today's data # print(start, end) # res_d = db.threats.ransomware.delete_many({"mongoDate-CTI": {"$gte": start, "$lt": end}}) # print("Documents Deleted: ", res_d.deleted_count) db.threats.ransomware.drop() # import recent instance res = db.threats.ransomware.insert_many(total_list_data) print("Documents Inserted: ", len(res.inserted_ids)) """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/ransomware/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation""" analysis = DescriptiveAnalysis(collection=db.threats.ransomware, path=server_path) # call object to query collection. No arguments chooses default parameters {} # returns num of doc results analysis(query={}, projection={"_id": 0}) # returns a pandas data frame # ransomware_data_frame = analysis.time_series_analysis('mongoDate-CTI') # store analysis results # analysis.data_frame_to_csv(ransomware_data_frame, "perdayTimeSeriesRansomware") # analysis.data_frame_to_json(ransomware_data_frame, "perdayTimeSeriesRansomware") top3_entity_types = analysis.top_n(3, "Entity-Type", 'ransomware-top-entity-types') top5_scope = analysis.top_n(5, "Scope", 'ransomware-top-scope') top5_subcategories = analysis.top_n(5, "Subcategory", "ransomware-top-subcategories") print("Top Entity Type: ", top3_entity_types) print("Top Scope: ", top5_scope)
# delete today's data db.threats.webBasedAttacks1.delete_many( {"mongoDate-CTI": { "$gte": start, "$lt": end }}) # store most recent today's data res = db.threats.webBasedAttacks1.insert_many(json_data) print("Documents Inserted: ", len(res.inserted_ids)) """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/web-based-attacks/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation""" analysis = DescriptiveAnalysis(collection=db.threats.webBasedAttacks1, path=server_path) analysis(query={}, projection={'_id': 0}) # returns a pandas data frame data_frame = analysis.time_series_analysis('mongoDate-CTI') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesWebBasedAttacks") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesWebBasedAttacks") # top10 = analysis.top_n(10,"IP", "web-based-attacks-top-IPs") # print(top10) ''' Export current MongoDB collection instance ''' # webBasedAttacks_collection = ExportCollectionData(collection=db.threats.webBasedAttacks1, path=server_path) # webBasedAttacks_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0}) # webBasedAttacks_collection.export_collection_to_json("dataset-webBasedAttacks") # # csv_header = ["IP", "Category", "Entity-Type", "TimestampUTC-CTI", "DatetimeUTC-CTI"] # webBasedAttacks_collection.export_collection_to_csv("dataset-webBasedAttacks", csv_header)
if __name__ == "__main__": print("Report on", datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), '\n') # parsing {0} to the string for iteration indicator = 'https://www.phishtank.com/phish_search.php?page={0}&active=y&verified=u' # ''' Crawl Phishtank''' db = connect_to_mongodb() crawl_site(db, indicator) """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/phishing/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation""" analysis = DescriptiveAnalysis(collection=db.threats.phishtank, path=server_path) analysis(query={}, projection={"_id": 0}) # returns a pandas data frame data_frame = analysis.time_series_analysis('mongoDate') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesPhishingCurrentInstance") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesPhishingCurrentInstance") ''' Export current MongoDB collection instance ''' # exploitDataBase = ExportCollectionData(collection=db.threats.phishtank, path=server_path) # exploitDataBase(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0}) # exploitDataBase.export_collection_to_json("dataset-phishing-current-instance") # # csv_header = ["URL", "Submitted-by", "Valid", "Online", "DatetimeUTC", "TimestampUTC", # "DatetimeUTC-CTI", "TimestampUTC-CTI", "Entity-type", "Category"]