"--password", type=str, help="password for connecting to hyperion gray api", required=True) args = parser.parse_args() print("Got arguments:", args) #url_cpe = "https://effect.hyperiongray.com/api/cpe/" if (args.date == "1970-01-01T00:00:00Z"): url_cpe = "https://effect.hyperiongray.com/api/cpe" #To get everything else: url_cpe = "https://effect.hyperiongray.com/api/cpe/updates/" + str( args.date) apiDownloader = APIDownloader(sc, sqlContext) page_num = 0 total_pages = 1 batch_size = 100 while page_num < total_pages: url_query = url_cpe + "/pages/" + str(page_num) + "?limit=" + str( batch_size) results_json = apiDownloader.download_api(url_query, "isi", args.password) if results_json is not None and "results" in results_json: results = results_json["results"] num_results = len(results) total_pages = results_json["total_pages"]
"feverclan.com", "floridabar.org", #No results "i-dressup.com", "jivesoftware.com", "justformen.com", #No result "Last.fm", "manaliveinc.org", "newseasims.com", "saintfrancis.com", "ssctech.com", #No result "unm.edu", #No result "usc.edu", #No result "wpcapital.com" ] apiDownloader = APIDownloader(sc, sqlContext) result_rdds = list() for domain in domains: results = apiDownloader.download_api(url + domain, "isi", args.password) if results is not None: if "results" in results: if len(results["results"]) > 0: rdd = sc.parallelize(results["results"]) apiDownloader.load_into_cdr(results["results"], "hg_leaked_source", args.team, "hg-leaked-source") result_rdds.append(rdd) if len(result_rdds) > 0:
parser.add_argument("-p", "--password", type=str, help="api key", required=False) args = parser.parse_args() print("Got arguments:", args) def write_output_to_file(file_name, result): out_file = open(file_name, 'w') for line in result: line = json.dumps(line, ensure_ascii=False) out_file.write(line + "\n") apiDownloader = APIDownloader(sc, sqlContext) if (args.date == "1970-01-01"): url = "http://cloudeffect02.isi.edu:5620?" else: url = "http://cloudeffect02.isi.edu:5620?start_date=" + str( args.date) + "&end_date=" + str(args.date) + "&" apiDownloader = APIDownloader(sc, sqlContext) page_num = 0 total_pages = 1 batch_size = 50 while page_num < total_pages: url_query = url + "start=" + str(page_num) + "&limit=" + str( batch_size)
def get_all_urls(): date_filter = '' if(args.date != "1970-01-01"): date_filter = "from=" + args.date + "&to=" + args.date return { "zero-day-products": "https://apigargoyle.com/GargoyleApi/getZerodayProducts?order=scrapedDate&" + date_filter, "hacking-items": "https://apigargoyle.com/GargoyleApi/getHackingItems?order=scrapedDate&" + date_filter, "hacking-posts": "https://apigargoyle.com/GargoyleApi/getHackingPosts?order=scrapedDate&" + date_filter, "twitter": "https://apigargoyle.com/GargoyleApi/getTwitterData?" + date_filter, "exploit-db": "https://apigargoyle.com/GargoyleApi/getExploitDBData?" + date_filter, "dark-mentions": "https://apigargoyle.com/GargoyleApi/getDarkMentions?", #+ date_filter, "dark-mention-rules": "https://apigargoyle.com/GargoyleApi/getDarkMentionRules?" + date_filter } apiDownloader = APIDownloader(sc, sqlContext) urls = get_all_urls() exception_occured = False max_num_tries_per_call = 3 error = "" for api_name in urls: try: source = args.team + "-" + api_name done = False start = 0 max_limit = 1000 while done is False: paging_url = urls[api_name] + "&start=" + str(start) + "&limit=" + str(max_limit)
import json ''' spark-submit --deploy-mode client \ --py-files /home/hadoop/effect-workflows/lib/python-lib.zip \ hgMSBulletin.py \ --outputFolder <HDFS or s3 output folder> \ --team "hyperiongray" \ --password <PASSWORD> \ --date 2016-10-02T12:00:00+00:00 ''' if __name__ == "__main__": sc = SparkContext() sqlContext = HiveContext(sc) apiDownloader = APIDownloader(sc, sqlContext) parser = ArgumentParser() parser.add_argument("-f", "--outputFolder", type=str, help="Output foldername", required=True) parser.add_argument("-t", "--team", type=str, help="Team Name", required=True) parser.add_argument("-d", "--date", type=str,
type=str, help="Greater than equal date", required=True) parser.add_argument("-p", "--password", type=str, help="password for connecting to hyperion gray api", required=True) args = parser.parse_args() print("Got arguments:", args) if (args.date == "1970-01-01T00:00:00+00:00"): url_zdi = "https://effect.hyperiongray.com/api/zdi/" else: timestamp = DateUtil.unix_timestamp(args.date, "%Y-%m-%dT%H:%M:%S%Z") url_zdi = "https://effect.hyperiongray.com/api/zdi/?query={\"date\":{\"$gte\": {\"$date\": " + str( timestamp) + "}}}" apiDownloader = APIDownloader(sc, sqlContext) results = apiDownloader.download_api(url_zdi, "isi", args.password) if results is not None: print "Downloaded ", len( results), " new ZDI data rows. Adding them to CDR" if len(results) > 0: rdd = sc.parallelize(results) rdd.map(lambda x: ("hg-zdi", json.dumps(x))).saveAsSequenceFile( args.outputFolder + "/hg-zdi") apiDownloader.load_into_cdr(results, "hg_zdi", args.team, "hg-zdi")
"--password", type=str, help="password for connecting to hyperion gray api", required=True) args = parser.parse_args() print("Got arguments:", args) #url_cpe = "https://effect.hyperiongray.com/api/cpe/" if (args.date == "1970-01-01T00:00:00Z"): url_conference = "https://effect.hyperiongray.com/api/conferences" #To get everything else: url_conference = "https://effect.hyperiongray.com/api/conferences/updates/" + str( args.date) apiDownloader = APIDownloader(sc, sqlContext) results = apiDownloader.download_api(url_conference, "isi", args.password) if results is not None: num_results = len(results) print "Downloaded ", len( results), " new conference data rows. Adding them to CDR" if num_results > 0: apiDownloader.load_into_cdr(results, "hg_conference", args.team, "hg-conference") batch_size = 5000 start = 0 while (start < num_results): end = start + batch_size if (end > num_results): end = num_results
''' spark-submit --deploy-mode client \ --py-files /home/hadoop/effect-workflows/lib/python-lib.zip \ hgMSBulletin.py \ --outputFolder <HDFS or s3 output folder> \ --team "hyperiongray" \ --password <PASSWORD> \ --date 2016-10-02T12:00:00+00:00 ''' if __name__ == "__main__": sc = SparkContext() sqlContext = HiveContext(sc) apiDownloader = APIDownloader(sc, sqlContext) parser = ArgumentParser() parser.add_argument("-f", "--outputFolder", type=str, help="Output foldername", required=True) parser.add_argument("-t", "--team", type=str, help="Team Name", required=True) parser.add_argument("-d", "--date", type=str, help="Greater than equal date", required=True) parser.add_argument("-p", "--password", type=str, help="password for connecting to hyperion gray api", required=True) args = parser.parse_args() print ("Got arguments:", args) server = SSHTunnelForwarder('134.147.203.229', ssh_username='******', ssh_password=args.password, remote_bind_address=('134.147.203.219', 18000), local_bind_address=('127.0.0.1', 18000))