Exemple #1
0
 def pushToAzure(self, object_name):
     # push to azure
     if os.path.isfile("output/{}.csv".format(object_name)):
         try:
             self.__block_blob_service.create_blob_from_path(
                 Config.AZURE_CONTAINER_NAME,
                 Config.AZURE_FOLDER_NAME + "/{}.csv".format(object_name),
                 "output/{}.csv".format(object_name))
             print("^^^^^^ {} object pushed to Azure Storage Blob".format(
                 object_name))
         except Exception as inst:
             print(inst)
             IOHelper.appendToLog("azure_error.log", "\n\n{}".format(inst))
 def createJob(self, objectName, fieldList):
     print("###### Sending retrieve request: {} ".format(objectName))
     try:
         job_id, batch_id = self.__bulkHelper.createNormalBatch(
             objectName, fieldList)
         self.norm_job_batch_list.append({"job": job_id, "batch": batch_id})
         self.norm_job_download_list.append(job_id)
         self.norm_job_object_dict[job_id] = objectName
         IOHelper.appendToLog(
             "norm_jobs.log",
             "\nobject: {}, job_id: {}, batch_id, {}".format(
                 objectName, job_id, batch_id))
     except Exception as inst:
         print(inst)
         IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
Exemple #3
0
 def createJob(self, objectName, fieldList):
     print("###### Sending retrieve request: {} ".format(objectName))
     try:
         job_id, main_batch_id = self.__bulkHelper.createChunkBatch(
             objectName, fieldList)
         self.chunk_job_mainbatch_list.append({
             "job": job_id,
             "batch": main_batch_id
         })
         self.chunk_job_object_dict[job_id] = objectName
         IOHelper.appendToLog(
             "chunk_job.log",
             "\nobject: {}, job_id: {}, main_batch_id, {}".format(
                 objectName, job_id, main_batch_id))
     except Exception as inst:
         print(inst)
         IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
 def _check_norm_batch_status(self):
     while len(self.__normalHelper.norm_job_batch_list) > 0:
         print("\n\n### Checking norm batch status...")
         print("norm_job_batch_list: {}".format(
             self.__normalHelper.norm_job_batch_list))
         for job_batch_dict in self.__normalHelper.norm_job_batch_list[:]:
             j_id = job_batch_dict['job']
             b_id = job_batch_dict['batch']
             try:
                 b_status = self.__bulkHelper.getBatchStatus(
                     b_id, j_id, True)
                 b_state = b_status['state']
                 object_name = self.__normalHelper.norm_job_object_dict[
                     j_id]
                 print(
                     "\n# norm job: {} (object: {}), batch: {} status: {}".
                     format(j_id, object_name, b_id, b_state))
                 print("# full status result:")
                 print(b_status)
                 if b_state == "Completed":
                     result_id = self.__bulkHelper.getQueryBatchResultIds(
                         b_id, j_id)[0]
                     self.__normalHelper.norm_result_list.append({
                         "job":
                         j_id,
                         "batch":
                         b_id,
                         "result":
                         result_id
                     })
                     self.__normalHelper.norm_job_batch_list.remove(
                         job_batch_dict)
                 elif b_state == "Failed":
                     IOHelper.appendToLog(
                         "extract_error.log",
                         "\n\n# norm job: {} (object: {}), batch: {} status: {}"
                         .format(j_id, object_name, b_id, b_state))
                     self.__normalHelper.norm_job_batch_list.remove(
                         job_batch_dict)
             except Exception as inst:
                 print(inst)
                 IOHelper.appendToLog("api_error.log",
                                      "\n\n{}".format(inst))
             time.sleep(0.5)
         time.sleep(15)
 def _download_norm_batch_result(self):
     while len(self.__normalHelper.norm_job_download_list) > 0:
         print("\n\n### Checking download tasks...")
         print("## norm_job_download_list (To Do): {}".format(
             self.__normalHelper.norm_job_download_list))
         print("## norm_result_list (Ready To Do): {}".format(
             self.__normalHelper.norm_result_list))
         if len(self.__normalHelper.norm_result_list) > 0:
             for result_dict in self.__normalHelper.norm_result_list[:]:
                 j_id = result_dict['job']
                 b_id = result_dict['batch']
                 r_id = result_dict['result']
                 object_name = self.__normalHelper.norm_job_object_dict[
                     j_id]
                 if j_id in self.__normalHelper.norm_job_download_list:
                     print("# downloading job: {} - batch: {} - result: {}".
                           format(j_id, b_id, r_id))
                     try:
                         raw = self.__bulkHelper.getQueryBatchResults(
                             b_id, r_id, j_id, raw=True)
                         str_output = raw.read(decode_content=True).decode(
                             "utf-8", "replace")
                         # save to a file
                         # with open("output/{}.csv".format(object_name), "w") as text_file:
                         # 	text_file.write(str_output)
                         IOHelper.outputObjectToFile(
                             object_name, str_output)
                         self.__normalHelper.norm_job_download_list.remove(
                             j_id)
                         self.__normalHelper.norm_result_list.remove(
                             result_dict)
                         self.__bulkHelper.closeJob(j_id)
                         print("$$$$$$ {} objected downloaded!".format(
                             object_name))
                     except Exception as inst:
                         print(inst)
                         IOHelper.appendToLog("api_error.log",
                                              "\n\n{}".format(inst))
                     # push to azure
                     if Config.PUSH_TO_AZURE:
                         self.__azureHelper.pushToAzure(object_name)
                 time.sleep(0.5)
         time.sleep(15)
Exemple #6
0
 def _check_chunk_main_batch_status(self):
     while len(self.__chunkHelper.chunk_job_mainbatch_list) > 0:
         print("\n\n### Checking chunk main batch status...")
         print("# chunk_job_mainbatch_list: {}".format(
             self.__chunkHelper.chunk_job_mainbatch_list))
         for job_batch_dict in self.__chunkHelper.chunk_job_mainbatch_list[:]:
             j_id, b_id = job_batch_dict['job'], job_batch_dict['batch']
             try:
                 b_state = self.__bulkHelper.getBatchStatus(
                     b_id, j_id, True)['state']
                 object_name = self.__chunkHelper.chunk_job_object_dict[
                     j_id]
                 print("# chunk job: {} (object: {}), batch: {} status: {}".
                       format(j_id, object_name, b_id, b_state))
                 if b_state == "NotProcessed":
                     self.chunk_notprocessed_job_list.append(j_id)
                     self.__chunkHelper.chunk_job_mainbatch_list.remove(
                         job_batch_dict)
                     print("# chunk_notprocessed_job_list: {}".format(
                         ', '.join(self.chunk_notprocessed_job_list)))
                     download_chunk_batch_result_thread = DownloadChunkBatchResultThread(
                         "Download Chunk Batch Result Thread", object_name,
                         j_id, b_id)
                     download_chunk_batch_result_thread.start()
                 elif b_state == "Failed":
                     IOHelper.appendToLog(
                         "extract_error.log",
                         "\n\n# chunk job: {} (object: {}), batch: {} status: {}"
                         .format(j_id, object_name, b_id, b_state))
                     chunk_job_mainbatch_list.remove(job_batch_dict)
             except Exception as inst:
                 print(inst)
                 IOHelper.appendToLog("api_error.log",
                                      "\n\n{}".format(inst))
             time.sleep(1)
         time.sleep(40)
Exemple #7
0
    def _download_chunk_batch_result(self, object_name, job, main_batch):
        # wait until main_batch become "NotProcessed"
        while True:
            try:
                main_batch_state = self.__bulkHelper.getBatchStatus(
                    main_batch, job, True)['state']
                print(
                    "\n# Checking chunk job: {} (object: {}) - main_batch status: {}"
                    .format(job, object_name, main_batch_state))
                if main_batch_state == "NotProcessed":
                    break
                else:
                    time.sleep(40)
            except Exception as inst:
                print(inst)
                IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
        # wait until all the sub-batches become "Completed"
        while True:
            try:
                sub_batches = self.__bulkHelper.getBatchList(job)
                can_break = True
                print(
                    "\n# Checking chunk job: {} (object: {}) - sub_batch status:"
                    .format(job, object_name))
                for b in sub_batches:
                    if b['id'] != main_batch:
                        print("sub_batch: " + b['id'] + " - state: " +
                              b['state'])
                        if b['state'] != "Completed":
                            can_break = False
                if can_break:
                    print("All sub_batches completed!")
                    break
                else:
                    time.sleep(40)
            except Exception as inst:
                print(inst)
                IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
        try:
            sub_batch_ids = [
                b['id'] for b in self.__bulkHelper.getBatchList(job)
                if b['id'] != main_batch
            ]
            sub_batch_ids.sort()
            # retrieve data from batch
            for b in sub_batch_ids:
                r_ids = self.__bulkHelper.getQueryBatchResultIds(b, job)
                for r in r_ids:
                    raw = self.__bulkHelper.getQueryBatchResults(b,
                                                                 r,
                                                                 job,
                                                                 raw=True)
                    str_output = raw.read(decode_content=True).decode(
                        "utf-8", "replace")
                    print("Writing {} to file.".format(r))
                    # save to a file
                    IOHelper.outputObjectToFile(object_name, str_output)
        except Exception as inst:
            print(inst)
            IOHelper.appendToLog("api_error.log", "\n\n{}".format(inst))
        self.__bulkHelper.closeJob(job)
        print("$$$$$$ {} object downloaded!".format(object_name))

        # push to azure
        if Config.PUSH_TO_AZURE:
            self.__azureHelper.pushToAzure(object_name)
Exemple #8
0
def main():

    if (len(sys.argv) != 2):
        print("USAGE: {} path/to/git/repo/file/toscan".format(
            sys.argv[0].split('/')[-1]))
        return

    IOhelper = IOHelper()

    repoList = IOhelper.getreposfromFile(sys.argv[1])
    IOhelper.createOutputDirectory()
    IOhelper.write_output_headers()

    script_start_t = time.time()
    for repoURL in repoList:
        print("Preparing to scan repo {}".format(repoURL))
        IOhelper.write_report_line(repoURL)
        outDirName = IOhelper.getDirName(repoURL)
        gitHelper = GitHelper(repoURL)
        print("Creating necessary dirs")
        IOhelper.createRepoDirectory(outDirName)
        cloning_start_t = time.time()
        if not gitHelper.isgitRepo(outDirName):
            gitHelper.cloneRepo(outDirName)
        else:
            # force fetch all from repo to ensure working with latest
            gitHelper.fetch_all()
        cloning_end_t = time.time()
        IOhelper.write_report_line(
            "REPO CLONING TIME: ",
            "{}s".format(cloning_end_t - cloning_start_t))
        scanning_start_t = time.time()
        gitHelper.mine_repo(IOhelper.writeCVEentry, IOhelper.write_report_line,
                            IOhelper.record_non_vuln_commit)
        scanning_end_t = time.time()
        IOhelper.write_report_line(
            "REPO SCANNING TIME",
            "{}s".format(scanning_end_t - scanning_start_t))
        IOhelper.write_report_line(
            "REPO TOTAL TIME", "{}s".format(scanning_end_t - cloning_start_t))
        IOhelper.write_report_line()
    script_end_t = time.time()
    IOhelper.write_report_line("SCRIPT TOTAL TIME",
                               "{}s".format(script_end_t - script_start_t))
    IOhelper.free_resources()
Exemple #9
0
# -*- coding: utf-8 -*-

from SchemaHelper import SchemaHelper
from IOHelper import IOHelper
from BulkHelper import BulkHelper
from ChunkJobHelper import ChunkJobHelper
from NormalJobHelper import NormalJobHelper

# directory setup
# please note the data under log/ and output/ will be removed
IOHelper.init()

# retrieve the object schema
schemaHelper = SchemaHelper.getInstance()
object_fields_dict, object_chunkable_dict = schemaHelper.getObjectFieldDict()

# get the record count of each object retrieved
obj_record_count_dict = schemaHelper.getObjectRecordCount(object_fields_dict)

# send retrieve request to Salesforce Bulk API
chunkHelper = ChunkJobHelper.getInstance()
normalHelper = NormalJobHelper.getInstance()
for objectName,fieldList in object_fields_dict.items():
    if object_chunkable_dict[objectName]:
        chunkHelper.createJob(objectName, fieldList)
    else:
        normalHelper.createJob(objectName, fieldList)

# start job monitoring Threads
chunkHelper.startCheckChunkJobStatusThread()
normalHelper.startCheckNormalJobStatusThread()
Exemple #10
0
def build_matrix_map_from_data(data):
    row = data[0]
    col = data[1]
    n = row.__len__() + col.__len__() + 2
    map = numpy.zeros((n, n))
    for i in range(len(row)):
        map[0][i + 1] = row[i]
        for j in range(len(col)):
            map[i + 1][len(row) + j + 1] = 1
    for i in range(len(col)):
        map[len(row) + i + 1][n - 1] = col[i]
    return map


if __name__ == '__main__':
    file = "problem2.data"
    io = IOHelper(file)
    data = io.get_next_data()
    while data:
        row = len(data[0])
        col = len(data[1])
        map = build_matrix_map_from_data(data)
        pr = PushRelabel(map)
        max_flow = pr.push_relabel()
        [m, n] = max_flow.shape
        mat = max_flow[1:row + 1, row + 1:n - 1]
        print("matrix as follow:")
        print(mat)
        data = io.get_next_data()
Exemple #11
0
        str(m['av_mdc']),
        str(m['av_deg']),
        str(m['tran']),
        str(m['den']),
        str(m['c_cen']),
        str(m['b_cen']),
        str(mod)
    ]
    io_handler.write_out_line(','.join(line))


filepath = r"/Users/aklyussef/Google Drive/School/Grad/Courses/Semester2/Optimization/Project/collab_network/data/jazz.net"

if (len(sys.argv) != 2):
    print('USAGE: {} PATH_TO_NETWORK_FILES'.format(sys.argv[0]))
    exit(1)

network_dir = sys.argv[1]
#Modularity threshold for class cutoff can be extended to be used inside for loop for different classifications
mod_threshold = 0.6

io_h = IOHelper(network_dir)
io_h.writeOutputHeader(
    'filename,edges_node_r,avg_clustering,avg_mdc,avg_degree,transitivity,density,c_centrality,b_centrality,modularity'
)
networks = generate_networks(io_h)
process_generated_networks(networks, io_h)
process_network_files(io_h.get_files_in_dir(network_dir), io_h)

print('script finished!')