예제 #1
0
                        "--password",
                        type=str,
                        help="password for connecting to hyperion gray api",
                        required=True)

    args = parser.parse_args()
    print("Got arguments:", args)

    #url_cpe = "https://effect.hyperiongray.com/api/cpe/"
    if (args.date == "1970-01-01T00:00:00Z"):
        url_cpe = "https://effect.hyperiongray.com/api/cpe"  #To get everything
    else:
        url_cpe = "https://effect.hyperiongray.com/api/cpe/updates/" + str(
            args.date)

    apiDownloader = APIDownloader(sc, sqlContext)

    page_num = 0
    total_pages = 1
    batch_size = 100

    while page_num < total_pages:
        url_query = url_cpe + "/pages/" + str(page_num) + "?limit=" + str(
            batch_size)
        results_json = apiDownloader.download_api(url_query, "isi",
                                                  args.password)

        if results_json is not None and "results" in results_json:
            results = results_json["results"]
            num_results = len(results)
            total_pages = results_json["total_pages"]
        "feverclan.com",
        "floridabar.org",  #No results
        "i-dressup.com",
        "jivesoftware.com",
        "justformen.com",  #No result
        "Last.fm",
        "manaliveinc.org",
        "newseasims.com",
        "saintfrancis.com",
        "ssctech.com",  #No result
        "unm.edu",  #No result
        "usc.edu",  #No result
        "wpcapital.com"
    ]

    apiDownloader = APIDownloader(sc, sqlContext)

    result_rdds = list()
    for domain in domains:
        results = apiDownloader.download_api(url + domain, "isi",
                                             args.password)
        if results is not None:
            if "results" in results:
                if len(results["results"]) > 0:
                    rdd = sc.parallelize(results["results"])
                    apiDownloader.load_into_cdr(results["results"],
                                                "hg_leaked_source", args.team,
                                                "hg-leaked-source")
                    result_rdds.append(rdd)

    if len(result_rdds) > 0:
예제 #3
0
    parser.add_argument("-p",
                        "--password",
                        type=str,
                        help="api key",
                        required=False)

    args = parser.parse_args()
    print("Got arguments:", args)

    def write_output_to_file(file_name, result):
        out_file = open(file_name, 'w')
        for line in result:
            line = json.dumps(line, ensure_ascii=False)
            out_file.write(line + "\n")

    apiDownloader = APIDownloader(sc, sqlContext)
    if (args.date == "1970-01-01"):
        url = "http://cloudeffect02.isi.edu:5620?"
    else:
        url = "http://cloudeffect02.isi.edu:5620?start_date=" + str(
            args.date) + "&end_date=" + str(args.date) + "&"

    apiDownloader = APIDownloader(sc, sqlContext)

    page_num = 0
    total_pages = 1
    batch_size = 50

    while page_num < total_pages:
        url_query = url + "start=" + str(page_num) + "&limit=" + str(
            batch_size)
예제 #4
0
    def get_all_urls():
        date_filter = ''
        if(args.date != "1970-01-01"):
            date_filter = "from=" + args.date + "&to=" + args.date
        return {
            "zero-day-products": "https://apigargoyle.com/GargoyleApi/getZerodayProducts?order=scrapedDate&" + date_filter,
            "hacking-items":  "https://apigargoyle.com/GargoyleApi/getHackingItems?order=scrapedDate&" + date_filter,
            "hacking-posts": "https://apigargoyle.com/GargoyleApi/getHackingPosts?order=scrapedDate&" + date_filter,
            "twitter": "https://apigargoyle.com/GargoyleApi/getTwitterData?" + date_filter,
            "exploit-db": "https://apigargoyle.com/GargoyleApi/getExploitDBData?" + date_filter,
            "dark-mentions": "https://apigargoyle.com/GargoyleApi/getDarkMentions?", #+ date_filter,
            "dark-mention-rules": "https://apigargoyle.com/GargoyleApi/getDarkMentionRules?" + date_filter
        }

    apiDownloader = APIDownloader(sc, sqlContext)
    urls = get_all_urls()

    exception_occured = False
    max_num_tries_per_call = 3

    error = ""
    for api_name in urls:
        try:
            source = args.team + "-" + api_name
            done = False
            start = 0
            max_limit = 1000

            while done is False:
                paging_url = urls[api_name] + "&start=" + str(start) + "&limit=" + str(max_limit)
import json
'''
spark-submit --deploy-mode client \
    --py-files /home/hadoop/effect-workflows/lib/python-lib.zip \
    hgMSBulletin.py \
    --outputFolder <HDFS or s3 output folder> \
    --team "hyperiongray" \
    --password <PASSWORD> \
    --date 2016-10-02T12:00:00+00:00
'''
if __name__ == "__main__":

    sc = SparkContext()
    sqlContext = HiveContext(sc)

    apiDownloader = APIDownloader(sc, sqlContext)

    parser = ArgumentParser()
    parser.add_argument("-f",
                        "--outputFolder",
                        type=str,
                        help="Output foldername",
                        required=True)
    parser.add_argument("-t",
                        "--team",
                        type=str,
                        help="Team Name",
                        required=True)
    parser.add_argument("-d",
                        "--date",
                        type=str,
예제 #6
0
                        type=str,
                        help="Greater than equal date",
                        required=True)
    parser.add_argument("-p",
                        "--password",
                        type=str,
                        help="password for connecting to hyperion gray api",
                        required=True)

    args = parser.parse_args()
    print("Got arguments:", args)

    if (args.date == "1970-01-01T00:00:00+00:00"):
        url_zdi = "https://effect.hyperiongray.com/api/zdi/"
    else:
        timestamp = DateUtil.unix_timestamp(args.date, "%Y-%m-%dT%H:%M:%S%Z")
        url_zdi = "https://effect.hyperiongray.com/api/zdi/?query={\"date\":{\"$gte\": {\"$date\": " + str(
            timestamp) + "}}}"

    apiDownloader = APIDownloader(sc, sqlContext)

    results = apiDownloader.download_api(url_zdi, "isi", args.password)
    if results is not None:
        print "Downloaded ", len(
            results), " new ZDI data rows. Adding them to CDR"
        if len(results) > 0:
            rdd = sc.parallelize(results)
            rdd.map(lambda x: ("hg-zdi", json.dumps(x))).saveAsSequenceFile(
                args.outputFolder + "/hg-zdi")
            apiDownloader.load_into_cdr(results, "hg_zdi", args.team, "hg-zdi")
예제 #7
0
                        "--password",
                        type=str,
                        help="password for connecting to hyperion gray api",
                        required=True)

    args = parser.parse_args()
    print("Got arguments:", args)

    #url_cpe = "https://effect.hyperiongray.com/api/cpe/"
    if (args.date == "1970-01-01T00:00:00Z"):
        url_conference = "https://effect.hyperiongray.com/api/conferences"  #To get everything
    else:
        url_conference = "https://effect.hyperiongray.com/api/conferences/updates/" + str(
            args.date)

    apiDownloader = APIDownloader(sc, sqlContext)

    results = apiDownloader.download_api(url_conference, "isi", args.password)
    if results is not None:
        num_results = len(results)
        print "Downloaded ", len(
            results), " new conference data rows. Adding them to CDR"
        if num_results > 0:
            apiDownloader.load_into_cdr(results, "hg_conference", args.team,
                                        "hg-conference")
            batch_size = 5000
            start = 0
            while (start < num_results):
                end = start + batch_size
                if (end > num_results):
                    end = num_results
'''
spark-submit --deploy-mode client \
    --py-files /home/hadoop/effect-workflows/lib/python-lib.zip \
    hgMSBulletin.py \
    --outputFolder <HDFS or s3 output folder> \
    --team "hyperiongray" \
    --password <PASSWORD> \
    --date 2016-10-02T12:00:00+00:00
'''
if __name__ == "__main__":

    sc = SparkContext()
    sqlContext = HiveContext(sc)

    apiDownloader = APIDownloader(sc, sqlContext)

    parser = ArgumentParser()
    parser.add_argument("-f", "--outputFolder", type=str, help="Output foldername", required=True)
    parser.add_argument("-t", "--team", type=str, help="Team Name", required=True)
    parser.add_argument("-d", "--date", type=str, help="Greater than equal date", required=True)
    parser.add_argument("-p", "--password", type=str, help="password for connecting to hyperion gray api", required=True)

    args = parser.parse_args()
    print ("Got arguments:", args)

    server = SSHTunnelForwarder('134.147.203.229',
                                ssh_username='******',
                                ssh_password=args.password,
                                remote_bind_address=('134.147.203.219', 18000),
                                local_bind_address=('127.0.0.1', 18000))