print("Documents Matched Update Filter: ", docs_match_update) print("Documents Modified: ", docs_modified_update) print() """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/botnets/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation """ analysis = DescriptiveAnalysis(collection=db.threats.ipmasterlist, path=server_path) analysis(query={}, projection={"_id": 0}) # returns a pandas data frame data_frame = analysis.time_series_analysis('mongoDate') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesBotnets") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesBotnets") last_week = datetime.utcnow() - timedelta(days=7) analysis_2 = DescriptiveAnalysis(collection=db.threats.ipmasterlist, path=server_path) analysis_2(query={'mongoDate': {"$gte": last_week}}, projection={"_id": 0}) analysis_2.top_n(10, "IP", "botnets-top-ips") ''' Export current MongoDB collection instance ''' # ipmasterlist_collection = ExportCollectionData(collection=db.threats.ipmasterlist, path=server_path) # ipmasterlist_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0}) # ipmasterlist_collection.export_collection_to_json("dataset-botnets") # # csv_header = ["Category", "Entity-Type", "IP", "IP-User", "TimestampUTC", "DatetimeUTC", "TimestampUTC-CTI", "DatetimeUTC-CTI", "Description"] # ipmasterlist_collection.export_collection_to_csv("dataset-botnets", csv_header)
# scrape data from links data = scrape_and_model_content(html_links) ''' Store instance of scraped data in MongoDB ''' db = connect_to_mongodb() db.threats.malwaredomains.drop() res = db.threats.malwaredomains.insert_many(data) print("Documents Inserted: ", len(res.inserted_ids)) """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/malware/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation""" analysis = DescriptiveAnalysis(collection=db.threats.malwaredomains, path=server_path) analysis(query={}, projection={'_id': 0}) # returns a pandas data frame data_frame = analysis.time_series_analysis_per_month('mongoDate') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesMalware") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesMalware") top5 = analysis.top_n(5, "Subcategory", "malware-top-subcategories") print(top5) ''' Export current MongoDB collection instance ''' # malwaredomains_collection = ExportCollectionData(collection=db.threats.malwaredomains, path=server_path) # malwaredomains_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0}) # malwaredomains_collection.export_collection_to_json("dataset-malware") # # csv_header = ["Category", "Entity-Type", "Domain", "Subcategory", "Submitted-By", "TimestampUTC", "DatetimeUTC", "TimestampUTC-CTI", "DatetimeUTC-CTI", "State"] # malwaredomains_collection.export_collection_to_csv("dataset-malware", csv_header)
{"mongoDate-CTI": { "$gte": start, "$lt": end }}) # store most recent today's data res = db.threats.webBasedAttacks1.insert_many(json_data) print("Documents Inserted: ", len(res.inserted_ids)) """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/web-based-attacks/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation""" analysis = DescriptiveAnalysis(collection=db.threats.webBasedAttacks1, path=server_path) analysis(query={}, projection={'_id': 0}) # returns a pandas data frame data_frame = analysis.time_series_analysis('mongoDate-CTI') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesWebBasedAttacks") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesWebBasedAttacks") # top10 = analysis.top_n(10,"IP", "web-based-attacks-top-IPs") # print(top10) ''' Export current MongoDB collection instance ''' # webBasedAttacks_collection = ExportCollectionData(collection=db.threats.webBasedAttacks1, path=server_path) # webBasedAttacks_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0}) # webBasedAttacks_collection.export_collection_to_json("dataset-webBasedAttacks") # # csv_header = ["IP", "Category", "Entity-Type", "TimestampUTC-CTI", "DatetimeUTC-CTI"] # webBasedAttacks_collection.export_collection_to_csv("dataset-webBasedAttacks", csv_header)
docs_modified_update += upd_res.modified_count if upd_res.upserted_id is not None: docs_inserted += 1 except OperationFailure as e: print("Error in mongoDB operation > ", e) print("Documents Inserted: ", docs_inserted) print("Documents Matched Update Filter: ", docs_match_update) print("Documents Modified: ", docs_modified_update) """ Descriptive Analysis for Phishing Repository""" analysis = DescriptiveAnalysis(collection=db.threats.phishtank_repository, path=server_path) analysis(query={}, projection={"_id": 0}) # Time Series Anaalysis data_frame = analysis.time_series_analysis('mongoDate') analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesPhishing") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesPhishing") # Top Author Categorical Analysis last_week = datetime.utcnow() - timedelta(days=7) analysis_2 = DescriptiveAnalysis( collection=db.threats.phishtank_repository, path=server_path) analysis_2(query={'mongoDate': {"$gte": last_week}}, projection={"_id": 0}) top5 = analysis_2.top_n(5, "Submitted-by", "phishing-top-submitters") print(top5) ''' Export Current MongoDB Collection Instance ''' # exploitDataBase = ExportCollectionData(collection=db.threats.phishtank_repository, path=server_path) # exploitDataBase(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0}) # exploitDataBase.export_collection_to_json("dataset-phishing") # csv_header = ["URL", "Submitted-by", "Valid", "Online", "DatetimeUTC", "TimestampUTC", # "DatetimeUTC-CTI", "TimestampUTC-CTI", "Entity-type", "Category"] # exploitDataBase.export_collection_to_csv("dataset-phishing", csv_header)
# parsing {0} to the string for iteration indicator = 'https://www.phishtank.com/phish_search.php?page={0}&active=y&verified=u' # ''' Crawl Phishtank''' db = connect_to_mongodb() crawl_site(db, indicator) """ Set Path Of Data Exportation """ # to run on server server_path = '/var/www/html/saint/indicators2018/phishing/' # to run locally local_path = "" """ Descriptive Analysis and Result Exportation""" analysis = DescriptiveAnalysis(collection=db.threats.phishtank, path=server_path) analysis(query={}, projection={"_id": 0}) # returns a pandas data frame data_frame = analysis.time_series_analysis('mongoDate') # store analysis results analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesPhishingCurrentInstance") analysis.data_frame_to_json(data_frame, "perdayTimeSeriesPhishingCurrentInstance") ''' Export current MongoDB collection instance ''' # exploitDataBase = ExportCollectionData(collection=db.threats.phishtank, path=server_path) # exploitDataBase(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0}) # exploitDataBase.export_collection_to_json("dataset-phishing-current-instance") # # csv_header = ["URL", "Submitted-by", "Valid", "Online", "DatetimeUTC", "TimestampUTC", # "DatetimeUTC-CTI", "TimestampUTC-CTI", "Entity-type", "Category"] # exploitDataBase.export_collection_to_csv("dataset-phishing-current-instance", csv_header)