예제 #1
0
        docs_match_update += upd_res.matched_count
        docs_modified_update += upd_res.modified_count
        if upd_res.upserted_id is not None:
            docs_inserted += 1

    print("Documents Inserted: ", docs_inserted)
    print("Documents Matched Update Filter: ", docs_match_update)
    print("Documents Modified: ", docs_modified_update)
    print()
    """ Set Path Of Data Exportation """
    # to run on server
    server_path = '/var/www/html/saint/indicators2018/botnets/'
    # to run locally
    local_path = ""
    """ Descriptive Analysis and Result Exportation"""
    analysis = DescriptiveAnalysis(collection=db.threats.botscout,
                                   path=server_path)
    analysis(query=None, projection=None)
    # returns a pandas data frame
    data_frame = analysis.time_series_analysis('mongoDate')
    # store analysis results
    analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesBotnetsDetailed")
    analysis.data_frame_to_json(data_frame, "perdayTimeSeriesBotnetsDetailed")

    last_week = datetime.utcnow() - timedelta(days=7)
    analysis_2 = DescriptiveAnalysis(collection=db.threats.botscout,
                                     path=server_path)
    analysis_2(query={'mongoDate': {"$gte": last_week}}, projection={"_id": 0})
    top_countries = analysis_2.top_n(10, "Country",
                                     'botnets-detailed-top-countries')
    print(top_countries)
    ''' Export current MongoDB collection instance '''
        docs_match_update += upd_res.matched_count
        docs_modified_update += upd_res.modified_count
        if upd_res.upserted_id is not None:
            docs_inserted += 1

    print("Documents Inserted: ", docs_inserted)
    print("Documents Matched Update Filter: ", docs_match_update)
    print("Documents Modified: ", docs_modified_update)
    print()
    """ Set Path Of Data Exportation """
    # to run on server
    server_path = '/var/www/html/saint/indicators2018/botnets/'
    # to run locally
    local_path = ""
    """ Descriptive Analysis and Result Exportation """
    analysis = DescriptiveAnalysis(collection=db.threats.ipmasterlist,
                                   path=server_path)
    analysis(query={}, projection={"_id": 0})
    # returns a pandas data frame
    data_frame = analysis.time_series_analysis('mongoDate')
    # store analysis results
    analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesBotnets")
    analysis.data_frame_to_json(data_frame, "perdayTimeSeriesBotnets")

    last_week = datetime.utcnow() - timedelta(days=7)
    analysis_2 = DescriptiveAnalysis(collection=db.threats.ipmasterlist,
                                     path=server_path)
    analysis_2(query={'mongoDate': {"$gte": last_week}}, projection={"_id": 0})
    analysis_2.top_n(10, "IP", "botnets-top-ips")
    ''' Export current MongoDB collection instance '''
    # ipmasterlist_collection = ExportCollectionData(collection=db.threats.ipmasterlist, path=server_path)
    # ipmasterlist_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0})
 # crawl indicator
 html_links = crawl_malware_domains(URL)
 # scrape data from links
 data = scrape_and_model_content(html_links)
 ''' Store instance of scraped data in MongoDB '''
 db = connect_to_mongodb()
 db.threats.malwaredomains.drop()
 res = db.threats.malwaredomains.insert_many(data)
 print("Documents Inserted: ", len(res.inserted_ids))
 """ Set Path Of Data Exportation """
 # to run on server
 server_path = '/var/www/html/saint/indicators2018/malware/'
 # to run locally
 local_path = ""
 """ Descriptive Analysis and Result Exportation"""
 analysis = DescriptiveAnalysis(collection=db.threats.malwaredomains,
                                path=server_path)
 analysis(query={}, projection={'_id': 0})
 # returns a pandas data frame
 data_frame = analysis.time_series_analysis_per_month('mongoDate')
 # store analysis results
 analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesMalware")
 analysis.data_frame_to_json(data_frame, "perdayTimeSeriesMalware")
 top5 = analysis.top_n(5, "Subcategory", "malware-top-subcategories")
 print(top5)
 ''' Export current MongoDB collection instance '''
 # malwaredomains_collection = ExportCollectionData(collection=db.threats.malwaredomains, path=server_path)
 # malwaredomains_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0})
 # malwaredomains_collection.export_collection_to_json("dataset-malware")
 #
 # csv_header = ["Category", "Entity-Type", "Domain", "Subcategory", "Submitted-By", "TimestampUTC", "DatetimeUTC", "TimestampUTC-CTI", "DatetimeUTC-CTI", "State"]
 # malwaredomains_collection.export_collection_to_csv("dataset-malware", csv_header)
    for doc in cursor:
        try:
            upd_res = db.threats.phishtank_repository.update_one(
                {"_id": doc["_id"]}, {"$set": doc}, upsert=True)
            docs_match_update += upd_res.matched_count
            docs_modified_update += upd_res.modified_count
            if upd_res.upserted_id is not None:
                docs_inserted += 1
        except OperationFailure as e:
            print("Error in mongoDB operation > ", e)

    print("Documents Inserted: ", docs_inserted)
    print("Documents Matched Update Filter: ", docs_match_update)
    print("Documents Modified: ", docs_modified_update)
    """ Descriptive Analysis for Phishing Repository"""
    analysis = DescriptiveAnalysis(collection=db.threats.phishtank_repository,
                                   path=server_path)
    analysis(query={}, projection={"_id": 0})
    # Time Series Anaalysis
    data_frame = analysis.time_series_analysis('mongoDate')
    analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesPhishing")
    analysis.data_frame_to_json(data_frame, "perdayTimeSeriesPhishing")
    # Top Author Categorical Analysis
    last_week = datetime.utcnow() - timedelta(days=7)
    analysis_2 = DescriptiveAnalysis(
        collection=db.threats.phishtank_repository, path=server_path)
    analysis_2(query={'mongoDate': {"$gte": last_week}}, projection={"_id": 0})
    top5 = analysis_2.top_n(5, "Submitted-by", "phishing-top-submitters")
    print(top5)
    ''' Export Current MongoDB Collection Instance '''
    # exploitDataBase = ExportCollectionData(collection=db.threats.phishtank_repository, path=server_path)
    # exploitDataBase(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0})
예제 #5
0
 # start, end = today_datetime()
 # # delete today's data
 # print(start, end)
 # res_d = db.threats.ransomware.delete_many({"mongoDate-CTI": {"$gte": start, "$lt": end}})
 # print("Documents Deleted: ", res_d.deleted_count)
 db.threats.ransomware.drop()
 # import recent instance
 res = db.threats.ransomware.insert_many(total_list_data)
 print("Documents Inserted: ", len(res.inserted_ids))
 """ Set Path Of Data Exportation """
 # to run on server
 server_path = '/var/www/html/saint/indicators2018/ransomware/'
 # to run locally
 local_path = ""
 """ Descriptive Analysis and Result Exportation"""
 analysis = DescriptiveAnalysis(collection=db.threats.ransomware,
                                path=server_path)
 # call object to query collection. No arguments chooses default parameters {}
 # returns num of doc results
 analysis(query={}, projection={"_id": 0})
 # returns a pandas data frame
 # ransomware_data_frame = analysis.time_series_analysis('mongoDate-CTI')
 # store analysis results
 # analysis.data_frame_to_csv(ransomware_data_frame, "perdayTimeSeriesRansomware")
 # analysis.data_frame_to_json(ransomware_data_frame, "perdayTimeSeriesRansomware")
 top3_entity_types = analysis.top_n(3, "Entity-Type",
                                    'ransomware-top-entity-types')
 top5_scope = analysis.top_n(5, "Scope", 'ransomware-top-scope')
 top5_subcategories = analysis.top_n(5, "Subcategory",
                                     "ransomware-top-subcategories")
 print("Top Entity Type: ", top3_entity_types)
 print("Top Scope: ", top5_scope)
 # delete today's data
 db.threats.webBasedAttacks1.delete_many(
     {"mongoDate-CTI": {
         "$gte": start,
         "$lt": end
     }})
 # store most recent today's data
 res = db.threats.webBasedAttacks1.insert_many(json_data)
 print("Documents Inserted: ", len(res.inserted_ids))
 """ Set Path Of Data Exportation """
 # to run on server
 server_path = '/var/www/html/saint/indicators2018/web-based-attacks/'
 # to run locally
 local_path = ""
 """ Descriptive Analysis and Result Exportation"""
 analysis = DescriptiveAnalysis(collection=db.threats.webBasedAttacks1,
                                path=server_path)
 analysis(query={}, projection={'_id': 0})
 # returns a pandas data frame
 data_frame = analysis.time_series_analysis('mongoDate-CTI')
 # store analysis results
 analysis.data_frame_to_csv(data_frame, "perdayTimeSeriesWebBasedAttacks")
 analysis.data_frame_to_json(data_frame, "perdayTimeSeriesWebBasedAttacks")
 # top10 = analysis.top_n(10,"IP", "web-based-attacks-top-IPs")
 # print(top10)
 ''' Export current MongoDB collection instance '''
 # webBasedAttacks_collection = ExportCollectionData(collection=db.threats.webBasedAttacks1, path=server_path)
 # webBasedAttacks_collection(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0})
 # webBasedAttacks_collection.export_collection_to_json("dataset-webBasedAttacks")
 #
 # csv_header = ["IP", "Category", "Entity-Type", "TimestampUTC-CTI", "DatetimeUTC-CTI"]
 # webBasedAttacks_collection.export_collection_to_csv("dataset-webBasedAttacks", csv_header)
if __name__ == "__main__":

    print("Report on", datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S'), '\n')
    # parsing {0} to the string for iteration
    indicator = 'https://www.phishtank.com/phish_search.php?page={0}&active=y&verified=u'

    # ''' Crawl Phishtank'''
    db = connect_to_mongodb()
    crawl_site(db, indicator)
    """ Set Path Of Data Exportation """
    # to run on server
    server_path = '/var/www/html/saint/indicators2018/phishing/'
    # to run locally
    local_path = ""
    """ Descriptive Analysis and Result Exportation"""
    analysis = DescriptiveAnalysis(collection=db.threats.phishtank,
                                   path=server_path)
    analysis(query={}, projection={"_id": 0})
    # returns a pandas data frame
    data_frame = analysis.time_series_analysis('mongoDate')
    # store analysis results
    analysis.data_frame_to_csv(data_frame,
                               "perdayTimeSeriesPhishingCurrentInstance")
    analysis.data_frame_to_json(data_frame,
                                "perdayTimeSeriesPhishingCurrentInstance")
    ''' Export current MongoDB collection instance '''
    # exploitDataBase = ExportCollectionData(collection=db.threats.phishtank, path=server_path)
    # exploitDataBase(query={}, projection={"_id": 0, "mongoDate": 0, "mongoDate-CTI": 0})
    # exploitDataBase.export_collection_to_json("dataset-phishing-current-instance")
    #
    # csv_header = ["URL", "Submitted-by", "Valid", "Online", "DatetimeUTC", "TimestampUTC",
    #                       "DatetimeUTC-CTI", "TimestampUTC-CTI", "Entity-type", "Category"]