Example #1
0
    def make_drugs_vs_genes_array(self, plot_heatmap=False):

        print('making drug vs genes array')
        dg_arr = np.zeros((len(self.drug_ids.keys()), len(self.gene_ids.keys())))

        tot_count = len(self.drugs)
        for i, (drug, gene) in enumerate(zip(self.drugs, self.genes_with_drugs)):
            progress(i, tot_count)
            if np.isnan(self.max_clinic_stage[i]):
                print(drug, gene)
            dg_arr[self.drug_ids[drug], self.gene_ids[gene]] = 1#self.max_clinic_stage[i]

        if plot_heatmap:
            fig = plt.figure()
            sns.heatmap(dg_arr)
            fig.suptitle('drug vs gene array', fontsize=14)
            plt.xlabel('gene id', fontsize=12)
            plt.ylabel('drug id', fontsize=12)
            fig.savefig('./drugsVsGenes.png')

        self.drugs_genes_array = dg_arr

        with open('drugs_vs_genes.pickle', 'wb') as f:
            pickle.dump(dg_arr, f, protocol=pickle.HIGHEST_PROTOCOL)
        np.savetxt('drugs_vs_genes.csv', dg_arr, delimiter=',')
Example #2
0
def main():

    data = {}
    host_list = build_host_list(args)

    print("================================================")
    print("Checking Live Host and Alternate Names -> Shodan")
    print("================================================\n")

    incrementer = 0

    for host in host_list:

        try:

            r = requests.get("https://www.shodan.io/host/%s" % host)

            if r.status_code == 200:

                if host not in data:
                    data[host] = []

                alternates = check_alternate_names(r.text)
                if bool(alternates):
                    for alt in alternates:
                        data[host].append(alt)

        except:
            continue

        incrementer += 1
        progress.progress(incrementer, len(host_list))

    with open(args.output, "w") as handle:
        handle.write(json.dumps(data, indent=4))
Example #3
0
 def handle1(data):
     # data = np.fromstring(data, dt)
     # print(data)
     global prog
     prog += len(data)
     progress(prog)
     f.write(data)
def retrieve_export_request(scan_list):

    increment = 0

    print("[*] Save Directory: %s" % PATH)
    print("[*] Formats => " + str(FORMATS))
    print("[*] Downloading Files....")

    for name, id in scan_list.items():

        for format in FORMATS:
            if format == "nessus" or format == "csv":
                export_request = json.loads(
                    requests.post(
                        NESSUS_INSTANCE + "/scans/%s/export" % str(id),
                        headers=HEADERS,
                        verify=False,
                        data=json.dumps({"format": format})).content.decode())
            elif format == "pdf" or format == "html":
                export_request = json.loads(
                    requests.post(NESSUS_INSTANCE +
                                  "/scans/%s/export" % str(id),
                                  headers=HEADERS,
                                  verify=False,
                                  data=json.dumps({
                                      "format":
                                      format,
                                      "chapters":
                                      "vuln_hosts_summary"
                                  })).content.decode())

            export_request_check(name, id, export_request, format)
            ''' Show Progress '''
            increment += 1
            progress.progress(increment, len(FORMATS) * len(scan_list))
Example #5
0
 def p(self, epoch, mb, NBatches, mul=None):
   pstring = "{:>03d} {:>05d}/{:>05d}, "
   #pstring += "{} since save.  "
   #pstring += "{} since load.  "
   #pstring += "{} loaded, "
   pstring += "{} saved.  "
   #if self.model.loadmomentum:
     #pstring += "Restore momentum."
   #else:
     #pstring += "Discard momentum."
   
   pstring += "  Success Combo?!? -->"
   mul = self.NSaves if mul is None else mul
   den = self.combolength * mul
   if self.combo >= den:
     den = self.combolength
   num = self.combo % den
   if num == 0:
     p = 1.0
   else:
     p = 1.0*num / den
   
   progress.progress(p,
     pstring.format(epoch, mb, NBatches,
                    #self.sincesave,
                    #self.sinceload,
                    #self.failures, 
                    self.saves,
                    ))
Example #6
0
    def comparison(self, name, func, num_checks, input):
        checked = 0
        out = collections.defaultdict(lambda: collections.defaultdict(list))
        futures = {}
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
            # Start the read operations and mark each future with its filepath
            for path, values in input.items():
                for mods in values.values():
                    # don't bother if last round didn't have duplicates
                    if len(mods) <= 1:
                        continue
                    for mod in mods:
                        filepath = os.path.join(self.base, mod, path)
                        futures[executor.submit(func, filepath)] = (mod, path)

            for future in concurrent.futures.as_completed(futures):
                mod, path = futures[future]
                key = future.result()
                out[path][key].append(mod)
                checked += 1
                progress(checked, num_checks, name)

        dupes = 0
        with open('_%s.txt', 'w') as f:
            with open('_%s_conflicts.txt', 'w') as fc:
                for path, values in out.iteritems():
                    for key, mods in values.iteritems():
                        key_str = ','.join([path, str(key)] + mods) + '\n'
                        f.write(key_str)
                        if len(mods) > 1:
                            dupes += len(mods)
                            fc.write(key_str)
        return dupes, out
Example #7
0
    def add_reactassoc_relations(self, weight=0.1):

        f_in = open(REACTASSOC_DATA, 'r')
        contents = f_in.readlines()
        f_in.close()
        tot_count = len(contents)
        for i, ln in enumerate(contents):
            progress(i, tot_count)
            ln = ln.replace('\n', '')
            primary_gene = ln.split('\t')[0]
            other_genes = ln.split('\t')[1:]
            try:
                prim_gene_id = self.gene_ids[primary_gene]
            except KeyError:
                print('Primary gene not found.. skipping')
                continue
            contrib_from_primary = self.genes_vs_disease_array[prim_gene_id, :]
            for j, other_gene in enumerate(other_genes):
                if j > 2:
                    break
                try:
                    oth_gene_id = self.gene_ids[other_gene]
                except KeyError:
                    continue
                orig_oth_score = self.genes_vs_disease_array[oth_gene_id, :]
                average_score = np.mean([contrib_from_primary, orig_oth_score], axis=0)
                self.genes_vs_disease_array[oth_gene_id, :] = average_score*weight + (1-weight)*orig_oth_score
Example #8
0
def cve_plugin_search(plugins, inputname=""):

    check_auth()

    plugin_id_list = []
    cve_to_lower()

    cve_data_dict = {}

    print("")
    if CONFIG_MODE == True:
        print(
            bcolors.OKGREEN + "[+]" + bcolors.ENDC +
            " Parsing each plugin for possible match on CVEs for CVE FILE: %s"
            % os.path.basename(inputname))
    else:
        print(bcolors.OKGREEN + "[+]" + bcolors.ENDC +
              " Parsing each plugin for possible match on CVEs")

    for count in range(len(plugins)):
        details = json.loads(
            requests.get("https://%s:8834/plugins/plugin/%d" %
                         (host, int(plugins[count])),
                         headers=headers,
                         verify=False).content)

        for i in range(len(details['attributes'])):
            if details['attributes'][i]['attribute_value'].lower() in cve_list:

                if details["attributes"][i]["attribute_value"].upper(
                ) in cve_data_dict:
                    if plugins[count] in cve_data_dict[details["attributes"][
                            i]["attribute_value"].upper()]:
                        continue
                    else:
                        cve_data_dict[details["attributes"][i][
                            "attribute_value"].upper()][plugins[count]] = {}
                        cve_data_dict[
                            details["attributes"][i]["attribute_value"].upper(
                            )][plugins[count]]["name"] = details["name"]
                        cve_data_dict[details["attributes"][i]
                                      ["attribute_value"].upper()][
                                          plugins[count]]["hosts"] = []
                else:
                    cve_data_dict[details["attributes"][i]
                                  ["attribute_value"].upper()] = {}
                    cve_data_dict[details["attributes"][i]["attribute_value"].
                                  upper()][plugins[count]] = {}
                    cve_data_dict[details["attributes"][i]
                                  ["attribute_value"].upper()][
                                      plugins[count]]["name"] = details["name"]
                    cve_data_dict[details["attributes"][i]["attribute_value"].
                                  upper()][plugins[count]]["hosts"] = []

        progress.progress(count, len(plugins) - 1)

    print("\n")

    return json.dumps(cve_data_dict)
Example #9
0
def naive_bayes(cache_model):
    print("Running Naive-Bayes Classifier Training")

    idx = 0
    total = len(data)
    start_progress("Pre-processing {} of data".format(total))
    for gender, comment in data:
        idx += 1
        word_exist = {}
        word_not_exist = {}

        if gender not in list_of_gender:
            list_of_gender.append(gender)

        for word in word_tokenize(comment):
            word_exist[word] = True
            word_not_exist[word] = False

            if word not in list_of_words:
                list_of_words.append(word)

        for gen in list_of_gender:
            if gen == gender:
                train_data_gender.append((word_exist, gen))
            else:
                train_data_gender.append((word_not_exist, gen))
        progress(idx / total * 100)
    end_progress()
    print("\nFinished pre-processing ({} data)".format(total))

    print("Training {} gender data".format(total))
    main_gender_classifier = NaiveBayesClassifier.train(train_data_gender)

    if cache_model:
        cache.cache_model(main_gender_classifier,
                          "model/gender_classifier_{}.p".format(total))

    print("Cross validation")
    average_accuracy = 0
    size = len(train_data_gender)

    for i in range(1, 9):
        test_set = train_data_gender[round((i - 1) * size / 8):round((i) *
                                                                     size / 8)]
        training_set = train_data_gender[0:round((i - 1) * size / 8)]
        training_set.extend(train_data_gender[round((i) * size / 8):])

        gender_classifier = NaiveBayesClassifier.train(training_set)

        print("Test-{0}: {1:.2%}".format(
            i, classify.accuracy(gender_classifier, test_set)))
        average_accuracy += classify.accuracy(gender_classifier, test_set)
    average_accuracy /= 8

    print("Average accuracy: " + "{0:.2%}\n".format(average_accuracy))

    return main_gender_classifier
Example #10
0
def calculateHashes(filespaths):
    fileInfos = []
    total = len(filespaths)
    i = 0
    for filepath in filespaths:
        i += 1
        progress.progress(i, total, "Calculating Hashes")
        fileInfos.append({"filepath":filepath,"digest":calculateHash(filepath)})
    return fileInfos
Example #11
0
def Menu():
    
    choice = input("What you option you want to use?\n1 = history\n2 = progress\n3 = main\n")

    if choice == "1":
        History()
    elif choice == "2":
        progress()
    else:
        Main()
Example #12
0
 def playHalf(p1, p2, numG, games):
     t = TicTacToe(p1, p2, verbose=self.VERBOSE)
     result = np.zeros(3)
     for g in range(numG):
         if g % math.ceil(0.05 * numGames) == 0:
             progress(games + g, numGames, status="Playing Games")
         print()
         winner = t.play()
         t.reset()
         result[winner + 1] += 1
     return result
Example #13
0
def merge(pdfs, course_code):
	print('\n')
	progress(0, 1, status='merging '+course_code)
	from PyPDF2 import PdfFileMerger
	merger = PdfFileMerger()

	for pdf in pdfs:
	    merger.append(pdf)
	merger.write(course_code+'\\'+'merged.pdf')
	progress(1, 1, status='merged '+course_code)
	print('\n')
def conv2_mexh_var(patch, scales, dx):
    
    import progress
    import numpy as np
    from conv2_mexh import conv2_mexh

    patch[patch == -9999.0] = 0
    patch[patch == -32767] = 0
    patch[np.isnan(patch)] = 0
    [nrows, ncols] = np.shape(patch)
    nodes = nrows * ncols
    
    #Normalize patch to have unit variance
    patch = patch/np.nanstd(patch)
    
    #initialize the output vectors:
    Vcwt = np.zeros((1,np.size(scales)))
    
    #Determine extent of edge effecst at largest wavelt scale sampled. NaN values will be assigned to the fringe of each C grid in the loop so that the same number of nodes are used at each scale for determining Vcwt:
    fringeEval = np.ceil(4*np.max(scales))
    
    
    #start counter
    k = 0
    for a in scales:
        progress.progress(a,np.max(scales),'Doing long job')
        
        #update counter
       
        
        #Compute the 2D CWT by calling Conv2_mexh function (below)
        C = conv2_mexh(patch,a,dx)
        
        # Mask edge effects with naN (no Data)
        C[(np.arange(0,fringeEval)).astype(int),:] = np.NaN
        C[:,(np.arange(0,fringeEval)).astype(int)] = np.NaN
        C[np.arange((nrows-fringeEval),nrows).astype(int),:] = np.NaN
        C[:,(np.arange(ncols-fringeEval,ncols)).astype(int)] = np.NaN
    
        #find NaNs and replace with 0        
        ind = np.argwhere(np.isnan(C))
        C[np.isnan(C)] = 0
        
        #now calculate the wavelet variance at current scale, using number of real-valued nodes
        Vcwt[0,k] = 1/(2*(nodes - ind.shape[0]))*np.sum(np.sum(C**2,1),0)
        
        #frequency and wavelegth vectors
        wave = 2*np.pi*dx*scales/(5/2)**(1/2)
        frq = 1/wave
        k = k +1
    
    return(Vcwt,frq,wave)
Example #15
0
    def make_results(self):
        lap_start = time.time()

        for i, result in enumerate(self.pool):
            lap_start = progress(i, lap_start, self.input_len, self.start_time,
                                 self.message)
            yield result
        progress(i,
                 lap_start,
                 self.input_len,
                 self.start_time,
                 self.message,
                 done=True)
Example #16
0
def sync(D,M,DM,idc=True,stub=False):
    for dest in D:

        names = []
        for d in D:
            if d != dest:
                names += list(d.N2A)
        if not names: 
            print "Unchanged: %s" % dest.bin
            continue

        b = fileutil.change_ext(dest.bin, "")
        progress.progress("Sync'ing %s with the others..." % dest.bin)
        if idc: 
            newnames_idc = open("%s-new.idc" % b, "w")
            existingnames_idc = open("%s-existing.idc" % b, "w")
            print >> newnames_idc, idch
            print >> existingnames_idc, idch
        if stub: 
            newnames_s = open("%s-new.S" % b, "w")
            existingnames_s = open("%s-existing.S" % b, "w")

        A = {}
        for k,n in enumerate(sorted(names)):
            progress.progress(float(k) / len(names))
            da = []
            for d in D:
                if n in d.N2A:
                    da.append((d, d.N2A[n]))
            m,s,c,fp = FindBestMatch(da, dest, M, DM)
            if m:
                if m in A:
                    if s > A[m][1]:
                        print "better match, replacing"
                        A[m] = (n,s,c,fp)
                else:
                    A[m] = (n,s,c,fp)
        print "saving..."
        for m,(n,s,c,fp) in A.iteritems():
            #~ print "NSTUB(%10s, %s)%s // %s" % ("0x%X"%m,n," " * (30-len(n)),c)
            if m in dest.A2N:
                #~ print " => already defined as %s" % (dest.A2N[m])
                if stub: print >> existingnames_s, "NSTUB(%10s, %s)%s // [already defined as %s] %s" % ("0x%X"%m,n," " * (30-len(n)), (dest.A2N[m]), c)
                if idc: print >> existingnames_idc, '    MakeName(%10s, "%s");%s // [already defined as %s] %s' % ("0x%X"%m,n," " * (30-len(n)), (dest.A2N[m]), c)
            else:
                if stub: print >> newnames_s, "NSTUB(%10s, %s)%s // %s" % ("0x%X"%m,n," " * (30-len(n)),c)
                if idc: print >> newnames_idc, '    MakeName(%10s, "%s");%s // %s' % ("0x%X"%m,n," " * (30-len(n)),c)
                    
        if idc: 
            print >> newnames_idc, "}"
            print >> existingnames_idc, "}"
Example #17
0
def download_files(course_links, course_code):
	local_file_path = []
	path = course_code
	total = len(course_links)
	i=0
	print('\n')
	msg='downloading.... '+str(i)+'/'+str(total)+' of '+str(total)+' files'
	i+=1
	
	for url in course_links:
		filename = url.rsplit('/',1)
		filename = filename[1].rsplit('.',1)
		j=0
		while os.path.exists(path+'\\'+filename[0]+'.'+filename[1]):
			filename[0]=filename[0].rsplit('_',1)
			filename[0]=filename[0][0]
			filename[0]+='_'+str(j)
			j+=1
		filename=filename[0]+'.'+filename[1]

		for month in get_range('month'):
			if re.search(month, url, re.IGNORECASE):
				filename =month+'_'+filename
		for year in get_range('year'):
			if re.search(year, url, re.IGNORECASE):
				filename =year+'_'+filename

		pathlib.Path(path).mkdir(parents=True, exist_ok=True)

		http = urllib3.PoolManager()
		try:
		    response = http.request('GET', url, preload_content=False, retries=False, timeout=10.0)
		except urllib3.exceptions.NewConnectionError:
			msg='Connection failed for url: '+url
		except urllib3.exceptions.TimeoutError:
			msg='Connection timed out for url: '+url
		else:
			with open(path+'\\'+filename, 'wb') as out:
				while True:
					data = response.read(100)
					if not data:
						break
					out.write(data)
				out.close()
				local_file_path.append(path+'\\'+filename)
		response.release_conn()
		msg='downloaded '+str(i)+'/'+str(total)+' of '+str(total)+' files'
		progress(i, total, status=msg)
		i+=1
	return local_file_path
Example #18
0
 def handle(data):
     global start, expect, prog
     array = np.fromstring(data, dtype=np.uint8)
     length = array.size
     if start:
         f.write(data)
         prog += length
         progress(prog)
         # if expect >= length:
         #     expect -= length
         # else:
         #     if expect < 0:
         #         i = -expect
         #         expect = 0
         #     else:
         #         i = 0
         #     while expect < length:
         #         while i < 8 and expect < length:
         #             if (i < 4 and array[expect] == 0xfb) or (i >= 4 and array[expect] == 0xff):
         #                 expect += 1
         #                 i += 1
         #             else:
         #                 print('*********error*********')
         #                 f.close()
         #                 sys.exit(1)
         #         if i < 8:
         #             expect = -i
         #             return
         #         else:
         #             expect += 940
         #     expect -= length
     else:
         i = 0
         while i < length:
             if array[i] == 0xfb:
                 expect += 1
                 if expect == 4:
                     start = True
                     if i < 3:
                         head = '\xfb' * (3 - i)
                         data = head + data
                     else:
                         data = data[i - 3:]
                     expect = 0
                     handle(data)
                     return
             else:
                 expect = 0
             i += 1
Example #19
0
def send_data():
    ser = serial.Serial('/dev/ttyUSB0', 57600, timeout=10)
    string = '\x40' * 95
    i = 0
    k = 0
    while True:
        i += 1
        s = string + format(i, '05d')
        k += len(s)
        progress(k)
        ser.write(s.encode())
        # if i == 100:
        #     sleep(100)
        sleep(0.1)
    ser.close()
Example #20
0
    def run(self, start_frame=0, end_frame=None):

        # Check input video exists.
        if not os.path.exists(self.input_video_path):
            raise Exception('Video does not exist!!')

        # Set up output directory.
        self._make_output_directory()

        # Save out frames to output directory.
        if not end_frame:
            end_frame = self.total_frames

        frames_to_get = numpy.arange(start_frame, end_frame, self.frame_step)
        frame_path_list = []

        prog = progress(len(frames_to_get))
        for i, frame in enumerate(frames_to_get):
            prog.update(i)
            video_frame = self.vs.get_frame_no(frame).image()
            width, height = video_frame.size
            new_size = (int(width/self.resize_factor), int(height/self.resize_factor))
            video_frame = video_frame.resize(new_size)
            filename = os.path.join(self.output_path, 'static', 'frame-%06d.jpeg' % frame)
            relative_path = os.path.join('static', 'frame-%06d.jpeg' % frame)
            video_frame.save(filename)
            frame_path_list.append(relative_path)

        frame_to_get_str_list = [str(frame) for frame in frames_to_get]
        prog.end()

        # Make webpage.
        make_page(self.output_path, 'index', frame_path_list, frame_to_get_str_list)
Example #21
0
async def _dump_page(campaign_id, filename, pageno, playerid, is_gm):
    global session, pages, done
    response = await session.get(
        'https://app.roll20.net/campaigns/chatarchive/{}/?p={}'.format(
            campaign_id, pageno),
        allow_redirects=False)
    async with response:
        if response.status != 200:
            raise HTTPError(response.status)
        parser = ChatParser(filename, playerid, is_gm)
        async for chunk in response.content.iter_chunked(64 * 1024):
            encoding = response.charset or 'utf-8'
            parser.process(chunk.decode(encoding))
        parser.finalize()
    done = done + 1
    progress(done, pages)
Example #22
0
def make_chunks(is_verbose=False, show_progress=False):
    """Make chunks of the file passed."""
    src, des = rem('grab')

    # Open the file in binary
    FILE_STREAM = open(src, 'rb')
    rem_read_size('register', 104857600)

    # After registering show progress
    if show_progress:
        progress_bar = progress.progress('chunk')
        progress_bar.start()

    count = 0
    while True:
        read_size = rem_read_size('grab')
        READ_CHUNK = FILE_STREAM.read(read_size)  # 52428800)  # 209715200)
        count += 1

        if not READ_CHUNK:
            return True

        if is_verbose:
            print("Copying chunk: {}".format(count))

        copy.copy_chunks(READ_CHUNK, count)

    # Wait for progress bar to end
    progress_bar.join()
Example #23
0
def send_file():
    ser = serial.Serial('/dev/ttyUSB0', 57600, timeout=10)
    file = open(
        '/home/longzhou/Data/chaoyangxiyuan/2017-09-08/1504850902_left.dat',
        'rb')
    size = 0
    while True:
        data = file.read(100)
        if len(data) == 0:
            break
        ser.write(data)
        size += len(data)
        progress(size)
        sleep(0.1)
    ser.close()
    print("done")
Example #24
0
def import_upx(args, opts):
    """ 
        Import the contents of a upx file. Does not import the upx file itself.
                Passes a list of imported oids.
    """
    if not args:
        raise ShellSyntaxError("No files/dir passed")
    
    oids = []
    newfiles = []
    for arg in args:
        if os.path.isdir(arg):
            print " - Processing upx files in directory %s" % arg
            files = sys_utils.get_files_from_directory(arg)
            p = progress.progress(len(files))
            for f in files:
                uoids, noids = import_upxfile(f)
                oids.extend(uoids)
                newfiles.extend(noids)
                p.tick()
                
        elif os.path.isfile(arg):
            print " - Processing file %s ..."
            uoids, noids = import_upxfile(arg)
            oids.extend(uoids)
            newfiles.extend(noids)
    
        else:
            print " - %s not found" % (arg)
    
    print " - Extracted %d files %d are new" % (len(oids), len(newfiles))
    return oids
Example #25
0
    def multiply_arrays(self, plot_heatmap=True, save_pickle=True, operation='mult'):

        if operation == 'mult':
            drugsVdisease = np.matmul(self.drugs_genes_array, self.genes_vs_disease_array)

            # find number of genes linked in chembl per drug
            num_genes_per_drug = np.sum(self.drugs_genes_array, axis=1)

            # divide by number of genes per drug to compute an average drug-disease
            # score across a range of intermediate genes
            drugsVdisease = drugsVdisease/num_genes_per_drug[:, None]

        elif operation == 'max':
            drugsVdisease = np.zeros((self.drugs_genes_array.shape[0], self.genes_vs_disease_array.shape[1]))
            tot_count = self.drugs_genes_array.shape[0]
            for i, drug_id in enumerate(range(self.drugs_genes_array.shape[0])):
                progress(i, tot_count)
                drug_to_genes = self.drugs_genes_array[drug_id, :]
                for disease_id in range(self.genes_vs_disease_array.shape[1]):
                    genes_to_disease = self.genes_vs_disease_array[:, disease_id]
                    max_score = np.max(genes_to_disease*drug_to_genes)
                    drugsVdisease[drug_id, disease_id] = max_score

        if plot_heatmap:
            fig = plt.figure()
            sns.heatmap(drugsVdisease)
            fig.suptitle('drugs vs diseases array', fontsize=14)
            plt.xlabel('disease id', fontsize=12)
            plt.ylabel('drug id', fontsize=12)
            fig.savefig('./drugsVsDiseases-{}.png'.format(operation))

            fig = plt.figure()
            sns.heatmap(self.drugs_straight_to_diseases_array)
            fig.suptitle('drugs vs diseases array', fontsize=14)
            plt.xlabel('disease id', fontsize=12)
            plt.ylabel('drug id', fontsize=12)
            fig.savefig('./drugsVsDiseases-chembl.png')

        if save_pickle:
            with open('drugs_vs_disease_{}.pickle'.format(operation), 'wb') as f:
                pickle.dump(drugsVdisease, f, protocol=pickle.HIGHEST_PROTOCOL)
            np.savetxt('drugs_vs_disease_{}.csv'.format(operation), drugsVdisease, delimiter=',')

        if operation == 'mult':
            self.drugs_disease_mult_array = drugsVdisease
        elif operation == 'max':
            self.drugs_disease_max_array = drugsVdisease
Example #26
0
def do(src, des, is_verbose=False, show_progress=False):
    """Copy of src to dst."""
    if is_verbose or show_progress:
        print("{} -> {}".format(src, des))

    # If the filesize is zero then use simple_copy
    if not sep.check_filesize():
        input('Simple copy')
        copy.simple_copy()
        return True

    if show_progress:
        rem_size('register', os.path.getsize(src))

    # Make a temp folder to keep the files
    tmp_dir = os.path.join(os.path.dirname(des), 'cpf_temp')

    if is_verbose:
        print("Making temp directory..")

    os.mkdir(tmp_dir)
    rem_dir('register', tmp_dir)

    # Start breaking into chunks

    if is_verbose:
        print("Making chunks...")

    sep.make_chunks(is_verbose, show_progress)

    if is_verbose:
        print("Combining chunks..")

    if show_progress:
        progress_bar = progress.progress('copy')
        progress_bar.start()

    # Now combine the stuff
    combine.combine_chunks()

    # Remove the folder
    if is_verbose or show_progress:
        if show_progress:
            # A newline is necessary after showing the progress
            print('')
        print('Cleaning up...')

    # Wait for the progress_bar thread to end
    if show_progress:
        progress_bar.join()

    cleanup.pass_names(tmp_dir)
    try:
        rmtree(tmp_dir)
    except Exception:
        pass

    rem('unregister')
    rem_dir('unregister')
Example #27
0
    def test_tick(self, mock_stdout):
        subject = progress()
        subject.write = MagicMock()

        self.assertEqual(subject.count, 0)
        subject.tick()
        self.assertEqual(subject.count, 1)
        subject.write.assert_called_once_with()
Example #28
0
def read_prescriptions(Prescriptions, gplookup, chemlookup):
    """ 
    for each line of the prescription file:
      get location and postcode from gplookup table
      get chemical from chemlookup table
      process BNF code into components
      print comma-separated record to output
    """
    f_info = os.stat(Prescriptions)
    size = f_info.st_size

    with open(Prescriptions, "rb") as csvfile:
        preader = csv.reader(csvfile)
        preader.next() # skip header
        pdata.writeheader()
        linecount=0
        for line in preader:
            linecount = linecount + 1
            if linecount % 1000 == 0:
                progress.progress(csvfile.tell(), size)
            o = pdata()
            o.sha = line[0]
            o.pct = line[1]
            o.practice = line[2]
            o.bnfcode = line[3]
            o.bnfname = line[4]
            o.items = line[5]
            o.nic = line[6]
            o.act_cost = line[7]
            o.quantity = line[8]
            o.period = line[9]
            o.year = o.period[0:4]
            o.month = o.period[4:6]
            o.chemical_code = o.bnfcode[0:9]
            o.chemical_name = chemlookup[o.chemical_code]
            o.product = o.bnfcode[9:11]
            o.generic = (o.product == "AA")
            o.equivalent = o.chemical_code + "AA" + o.bnfcode[13:15] + o.bnfcode[13:15]
            (o.xgrid, o.ygrid, o.postcode) = gplookup[o.practice]
            o.writeline()

            

    pass
Example #29
0
def main(pop_size, graph_file, cross_pb, mut_pb, num_gen, hof_size):
	global TOURNAMENT_SIZE
	TOURNAMENT_SIZE = 20

	global NUM_NODES
	NUM_NODES = int(graph_file)

	global ROOT
	ROOT = random.randint(1, NUM_NODES)

	global CAPACITY
	CAPACITY = random.randint(3, NUM_NODES)

	random_adjacency_matrix()

	# Hall Of Fame is terminology from deap but
	#     I like it so I am giving it credit here
	hof = hall_of_fame.hof(hof_size)

	population = generate_population(pop_size + (pop_size % 2)
					,int(NUM_NODES * (NUM_NODES + 1) / 2))

	fitness_evaluation(population)
	hof.update(population)

	start = time.clock()

	progress.startProgress("Generation Progress")

	# GA Execution
	# Mutation function can be changed from here
	for cur_gen in range(num_gen):
		children = crossover(population, cross_pb)
		population = mutation(children, mut_pb)
		fitness_evaluation(population)
		hof.update(population)
		progress.progress((cur_gen / num_gen) * 100)

	progress.endProgress()

	print hof
	print "Time: ", time.clock() - start
	print "ROOT: ", ROOT
	print "CAPACITY: ", CAPACITY
Example #30
0
    def test_defaults(self, mock_stdout):
        subject = progress()
        self.assertEqual(subject.count_format, '{:>5}')
        self.assertEqual(subject.render_order, subject.renderable_components())
        self.assertEqual(subject.progress_format, 'Processed: {count}')
        self.assertEqual(subject.stream, stdout)
        self.assertEqual(subject.width, None)

        # Computed from args.
        self.assertEqual(subject.computed_render_order, ['count'])
Example #31
0
    def test_blank_components(self, mock_stdout):
        subject = progress()
        subject.renderable_components = MagicMock(
            return_value=['foo', 'bar', 'buzz'])

        self.assertEqual(subject.blank_components(), {
            'foo': '',
            'bar': '',
            'buzz': ''
        })
Example #32
0
 def handle(data):
     global start, expect, i, prog
     array = np.fromstring(data, dtype=np.uint8)
     length = array.size
     if start:
         f.write(data)
         prog += length
         progress(prog)
         while expect < length:
             while i < 8 and expect < length:
                 if array[expect] == 0xfb if i < 4 else array[
                         expect] == 0xff:
                     expect += 1
                     i += 1
                 else:
                     print('*********error*********')
                     f.close()
                     sys.exit(1)
             if i < 8:
                 expect = 0
                 return
             else:
                 i = 0
                 expect += 500
         expect -= length
     else:
         i = 0
         while i < length:
             if array[i] == 0xfb:
                 expect += 1
                 if expect == 4:
                     start = True
                     expect = 0
                     i += 1
                     if i < length:
                         data = data[i:]
                         i = 4
                         handle(data)
                     i = 4
                     return
             else:
                 expect = 0
             i += 1
Example #33
0
 def do_import_folder(self):
     fname = str(QtGui.QFileDialog.getExistingDirectory(None, "Import Folder"))
     if not fname: return
     # Get a list of all files to be imported
     flist = []
     for data in os.walk(fname, followlinks = True):
         for f in data[2]:
             flist.append(os.path.join(data[0],f))
     for f in progress(flist, "Importing Files","Stop"):
         status = import_file(f)
         print(status)
Example #34
0
    def loadFile(self):
        """
        Responsible for the following :

        - Loading desired image
        - Passing the image path to main extraction function
        - show the images in their position
        """
        self.statusbar.showMessage("Loading Image File")
        self.loaded_image, self.loaded_image_format = QtWidgets.QFileDialog.getOpenFileName(
            None, "Load Image File", filter="*.jpg;; *.jpeg")
        self.logger.debug("Image File Loaded")

        # CHECK CONDITIONS
        if self.loaded_image == "":
            self.logger.debug("loading cancelled")
            self.statusbar.showMessage("Loading cancelled")
            pass
        else:
            self.logger.debug("starting extraction of data")

            try:
                self.logger.debug("Progressive Image ..")
                self.jpeg_extracted = pr.progress(self.loaded_image)
                self.progressive = True
            except TypeError:
                self.logger.debug("Loaded image %s is not progressive " %
                                  self.loaded_image)
                self.showMessage("Warning !",
                                 "You need to load a progressive Image",
                                 QtWidgets.QMessageBox.Ok,
                                 QtWidgets.QMessageBox.Warning)
                self.progressive = False
                pass
            if self.progressive:
                self.imageLoaded.setPixmap(
                    QtGui.QPixmap(self.loaded_image).scaled(250, 250))
                self.logger.debug("Loaded Image %s" % self.loaded_image)
                self.statusbar.showMessage("Extracting Image ... ")

                self.logger.debug("saving ")
                pr.save_images(self.jpeg_extracted, "results")
                self.logger.debug("Done")

                self.scrollArea.show()

                for indx, widget in enumerate(
                        sorted(self.photos, key=lambda x: x.objectName())):
                    self.logger.debug("Showing results/out%s.jpg" % indx)
                    widget.setPixmap(
                        QtGui.QPixmap("results/out%s.jpg" % indx).scaled(
                            250, 250))
                self.statusbar.clearMessage()
                self.statusbar.showMessage("Images are saved in results/ ")
Example #35
0
    def test_render_count(self, mock_stdout):
        subject = progress()
        width = randint(0, 100)
        subject.count = randint(0, 100)

        expected_render = subject.count_format.format(subject.count)
        self.assertEqual(subject.render_count(width), expected_render)

        subject.count_format = '{:>10}'
        expected_render = subject.count_format.format(subject.count)
        self.assertEqual(subject.render_count(width), expected_render)
Example #36
0
def process(mod_name, oid_list, opts=None, force=False):
    """ Calls a module over an oid_list without returning results.
    """
    logger.debug("process %s %s", mod_name, oid_list)
    if not opts: opts = {}
    # Clean up and validate inputs
    mod_type = get_mod_type(mod_name)
    if not mod_type:
        logger.error("Module %s not found", mod_name)
        return False 
    oid_list = cleanup_oid_list(mod_name, oid_list)
    if not options.validate_opts(mod_name, opts):
        logger.error("Failed to validate opts for %s : %s", mod_name, opts)
        return False
    try:
        # Prune analysis that already exists
        new_list = []
        for oid in oid_list:
            if not exists(mod_name, oid, opts) or force:
                new_list.append(oid)
        if len(new_list) == 0:  # Everything was already processed
            return True
        # Process the oid_list        
        if len(new_list) == 1 or not config.multiproc_on or mod_type in ["analyzers"]:
            ret_val = True
            if mod_type in ["extractors", "source"]:
                p = progress.progress(len(new_list))
                for oid in new_list:
                    if not single_call_module(mod_type, mod_name, oid, opts):
                        ret_val = False
                    p.tick()
                return ret_val
            else:
                # Don't keep the return value of analyzers and map_reducers, return False if they return None
                
                if not single_call_module(mod_type, mod_name, new_list, opts):
                    ret_val = False
                return ret_val
            
        else:  # Multiprocessing is on and not an analysis module
            if mod_type in ["extractors", "source"]:
                func = initialized_modules[mod_name].process
            elif mod_type in ["map_reducers"]:
                func = initialized_modules[mod_name].mapper
            else:
                raise otypes.UnrecognizedModule("Attempt to call module not of known type.")
            return mp.multi_map(func, new_list, opts, True)
    except:
        datastore.cleanup()
        raise
Example #37
0
def write_video(video_url, full_path, filename, chunk_size=4096):
    size = int(requests.head(video_url).headers['Content-Length'])
    size_on_disk = check_if_file_exists(full_path, filename)
    if size_on_disk < size:
        with open(full_path + "/" + filename, 'wb') as fd:
            r = requests.get(video_url, stream=True)
            current_size = 0
            for chunk in r.iter_content(chunk_size=chunk_size):
                fd.write(chunk)
                current_size += chunk_size
                s = progress(current_size, size, filename)
                print(s, end='', flush=True)
            print(s)
    else:
        print("{0} already downloaded, skipping...".format(filename))
Example #38
0
def single_call_module(type, mod_name, oid_list, opts):
    """ Calls any module type with one oid_list
    """
    if type in ["extractors", "source"]:
        return initialized_modules[mod_name].process(oid_list, opts)
    elif type in ["analyzers"]:
        return initialized_modules[mod_name].results(oid_list, opts)
    elif type in ["map_reducers"]:
        p = progress.progress(len(oid_list))
        jobid = get_cid_from_oid_list(oid_list)
        results = []
        for oid in oid_list:
            results.append( initialized_modules[mod_name].mapper(oid, opts, jobid) )
            p.tick()
        return initialized_modules[mod_name].reducer(results, opts, jobid)
    else:
        raise otypes.UnrecognizedModule("Attempt to call module not in module list")
Example #39
0
def unarchive(args, opts):
    """ 
        Try in unarchive (unzip and untar), passes a list of ununarchived oids
        Syntax: unarchive <oid_1> <oid_2> ... <oid_n>
    """
    valid, invalid = api.valid_oids(args)
    if not valid:
        raise ShellSyntaxError("No valid oids found")
        
    oids = api.expand_oids(valid)
    unarchived = []
    newfiles = []

    print " - Attempting to unarchive (zip, tar) %d files" % len(oids)
    p = progress.progress(len(oids))
    for oid in oids:
        data = api.get_field(api.source(oid), oid, "data")
        if not data:
            print "Not able to process %s" % (oid)
            p.tick()
            continue
            
        tmp = tmp_file(oid, data)
        if not tmp: continue
        aoids = []
        noids = []
        
        if tarfile.is_tarfile(tmp): # tar
            print " - Unpacking a tar file"
            aoids, noids = import_tarfile(tmp, parent_oid=oid)

        elif zipfile.is_zipfile(tmp): # zip
            print " - Unpacking a zip file"
            aoids, noids = import_zipfile(tmp, parent_oid=oid)
            
        unarchived.extend(aoids)
        newfiles.extend(noids)
        os.remove(tmp)
        p.tick()
        
    if unarchived:
        unarchived.extend(unarchive(unarchived, opts)) # Unpacked children

    print " - Extracted %d files %d are new" % (len(unarchived), len(newfiles))
    return unarchived
Example #40
0
 def import_directory(self, directory):
     """ Process the local directory calling the local import on each file
     """
     files_list = sys_utils.get_files_from_directory(directory)
     if files_list == None:
         return None, 0
     oids = []
     num_new_files = 0
     p = progress.progress(len(files_list))
     for file_location in files_list:
         oid, new_file = self.import_file(file_location)
         p.tick()
         if oid:
             oids.append(oid)
             if new_file:
                 num_new_files += 1
     oids = list(set(oids)) # assert uniqueness 
     return oids, num_new_files
Example #41
0
def import_files(files_list):
    if not isinstance(files_list, list):
        logger.error("files must be of type list.")
        return None, 0
    try:
        new_file_count = 0
        oids = []
        p = progress.progress(len(files_list))
        for file_location in files_list:
            oid, new_file = import_file(file_location)
            p.tick()
            if oid:
                oids.append(oid)
                if new_file:
                    new_file_count += 1
    except:
        datastore.cleanup()
        raise

    oids = list(set(oids)) # assert uniqueness 
    return oids, new_file_count
Example #42
0
def untar(args, opts):
    """ 
        Try to untar items passed, passes a list of untarred oids
        Syntax: untar <oid_1> <oid_2> ... <oid_n>
    """
    valid, invalid = api.valid_oids(args)
    if not valid:
        raise ShellSyntaxError("No valid oids found")

    oids = api.expand_oids(valid)
    untarred = []
    newfiles = []

    p = progress.progress(len(oids))
    print " - Attempting to untar %d files" % len(oids)
    for oid in oids:
        src = api.source(oid)
        data = api.get_field(src, oid, "data")
        if not data:
            print "No data found for %s" % (oid)
            p.tick()
            continue   
             
        tmpname = oid + ".tar.tmp"
        tmp = tmp_file(tmpname, data) 
        if not tmp: continue
        if tarfile.is_tarfile(tmp):
            toids, nfiles = import_tarfile(tmp, parent_oid=oid)
            untarred.extend(toids)
            newfiles.extend(nfiles)
            
        os.remove(tmp)
        p.tick()
    
    if untarred:
        untarred.extend(untar(untarred, opts)) # Untar children
        
    print " - %d files extracted, %d files are new" % (len(untarred), len(newfiles))
    return untarred
Example #43
0
def extrapolate(d):
    idapy.select_dump(d)
    srcfiles = []
    d.SRCFILES = {}
    
    progress("finding source file names...")
    for k,f in enumerate(sorted(d.FUNCS)):
        progress(float(k) / len(d.FUNCS))
        s = sourcefile(d, f, partial=False)
        if s:
            srcfiles.append(s)
    progress("finding source file groups...")
    srcfiles = list(set(srcfiles))
    for k,s in enumerate(srcfiles):
        progress(float(k) / len(srcfiles))
        g = findgrp(s)
        if g is None:
            print "WARNING: source file not found:", s
            continue
        for a in g:
            if a in d.SRCFILES:
                print "ERROR:", d.SRCFILES[a], s
            d.SRCFILES[a] = s
    print "Found source files for %d subs; %d subs reference file name directly" % (len(d.SRCFILES), len(srcfiles))
Example #44
0
def upx(args, opts):
    """ 
        Try to upx unpack items passed, passes a list of unpacked oids
        Syntax: upx <oid_1> <oid_2> ... <oid_n>
    """
    valid, invalid = api.valid_oids(args)
    if not valid:
        raise ShellSyntaxError("No valid oids found")
        
    oids = api.expand_oids(valid)
    unupx = []
    newfiles = []
    
    p = progress.progress(len(oids))
    print " - Attempting to UPX unpack %d files" % len(oids)
    for oid in oids:
        data = api.get_field(api.source(oid), oid, "data")
        if not data:
            print "No data found for %s" % (oid)
            p.tick()
            continue
        
        meta = api.retrieve("file_meta", oid)
        name = meta["names"].pop()
        tmpname = name + ".unpacked_upx"
        tmp = tmp_file(tmpname, data)
        if not tmp: continue
        if is_upx(tmp):
            uoids, noids = import_upxfile(tmp, parent_oid=oid)
            unupx.extend(uoids)
            newfiles.extend(noids)
            
        os.remove(tmp)
        p.tick()
    
    print " - %d files extracted, %d are new" % (len(unupx), len(newfiles))
    return unupx
Example #45
0
def test_names():
    """Go through the collection and show possible new names

    Search the cards for sounds or images with file names that look
    like MD5 hashes, rename the files and change the notes.
    """
    test_string = u''
    nids = mw.col.db.list("select id from notes")
    for nid in progress(nids, "Dehashilating", "This is all wrong!"):
        n = mw.col.getNote(nid)
        for (name, value) in n.items():
            rs = re.search(hash_name_pat, value)
            if None == rs:
                continue
            try:
                new_name_ = new_media_name(rs.group(1), rs.group(2), n)
            except ValueError:
                continue
            test_string += u'{0}{1} → {2}\n'.format(
                rs.group(1), rs.group(2),
                new_name_)
    if (test_string):
        showText('These new names will be used:\n' + test_string)
    return test_string
Example #46
0
optimizer = tf.train.AdamOptimizer(learningRate)
train = optimizer.minimize(cost)

init = tf.initialize_all_variables()

LEARNING_COUNT = 10000
BATCH_SIZE = 100

with tf.Session() as sess:
	sess.run(init)
	bat = batch.Batch(x_train, y_train)

	for i in range(LEARNING_COUNT):
		batch_xs, batch_ys = bat.next_batch(BATCH_SIZE)
		sess.run(train, feed_dict={X:batch_xs, Y:batch_ys, drop_prob:0.6, input_drop_prob:0.7})
		pg.progress(LEARNING_COUNT, i, sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys, drop_prob:0.6, input_drop_prob:0.7}))

	pg.complete()

	correct = tf.equal(tf.argmax(y_test, 1), tf.argmax(hypothesis, 1))
	accuracy = tf.reduce_mean(tf.cast(correct, "float"))
	
	print "accuracy: ", sess.run(accuracy, feed_dict={X:x_test, drop_prob:1., input_drop_prob:1.})

	test_set = np.loadtxt('./data/test.csv', delimiter=',', skiprows= 1)

	test_set = np.reshape(test_set, (len(test_set), 28, 28, 1))
	result = sess.run(hypothesis, feed_dict={X:test_set, drop_prob:1., input_drop_prob:1.})
	result = sess.run(tf.argmax(result, 1))
	with open("result.csv", "w") as f:
		f.write("ImageId,Label\n")
def main():
    parser = argparse.ArgumentParser(description="Calculate the statistical mapping of OGD street types (edgecatego) to"
                                                 "OSM street types (highway).")
    parser.add_argument("-H", "--hostname", dest="hostname", required=True, help="Host name or IP Address")
    parser.add_argument("-d", "--database", dest="database", required=True, help="The name of the database")
    parser.add_argument("-t", "--table", dest="table", required=True, help="The database table to read from")
    parser.add_argument("-P", "--primary-key", dest="primary_key", required=True, help="The name of the primary key column")
    parser.add_argument("-u", "--user", dest="user", required=False, help="The database user")
    parser.add_argument("-p", "--password", dest="password", required=False, help="The database password")

    args = parser.parse_args()

    # Try to connect
    try:
        conn = psycopg2.connect(
            host=args.hostname,
            database=args.database,
            user=args.user,
            password=args.password
        )
    except Exception as e:
        print("I am unable to connect to the database (%s)." % e.message)
        sys.exit(1)

    cur = conn.cursor()

    try:
        cur.execute("select %s from %s" % (args.primary_key, args.table))
    except Exception as e:
        print("I can't SELECT (%s)!" % e)

    rows = cur.fetchall()
    total = len(rows)
    processed = 0

    street_type_mapping = {}

    progress.startprogress("Processing all streets")

    for source_street in rows:
        percent = processed / total * 100.0
        progress.progress(round(percent, 0))

        objectid = source_street[0]

        statement = """
select
    s.edgecatego as source_type,
    l.highway as target_type,
    sum(ST_Length(ST_Intersection(ST_Buffer(l.way, 10, 'endcap=flat join=round'), s.geom2))) as length
from planet_osm_line l
    left join """ + args.table + """ s on (
            ST_Intersects(l.way, ST_Envelope(s.geom2)) and
            ST_Intersects(s.geom2, ST_Buffer(l.way, 10, 'endcap=flat join=round'))
        )
    where
        l.highway is not null
        and s.""" + args.primary_key + """ = %s
    group by edgecatego, highway
        """

        try:
            cur.execute(statement, (objectid,))
            results = cur.fetchall()

            for result in results:

                source_type = result[0]
                target_type = result[1]
                length = result[2]

                if not source_type in street_type_mapping:
                    street_type_mapping[source_type] = {}

                if target_type in street_type_mapping[source_type]:
                    street_type_mapping[source_type][target_type] += length
                else:
                    street_type_mapping[source_type][target_type] = length
        except Exception as e:
            print("I can't SELECT (%s)!" % e)
            sys.exit(1)

        processed += 1

    pprint(street_type_mapping)
Example #48
0
def train_test(epochs, eta, save_weights, save_errors, resume,
               init_name, nonlinearity_name, use_cifar10, batchsize=128):
    # import (deferred until now to make --help faster)
    import numpy as np
    import theano
    import theano.tensor as T
    import lasagne

    if use_cifar10 is True:
        print('Using CIFAR-10')
        import cifar10 as dataset
        num_classes = 10
    else:
        print('Using CIFAR-100')
        import cifar100 as dataset
        num_classes = 100
    import progress

    # instantiate network
    print("Instantiating network...")
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    if nonlinearity_name == 'relu':
        f = lasagne.nonlinearities.rectify
    elif nonlinearity_name == 'elu':
        f = lasagne.nonlinearities.elu
    elif nonlinearity_name == 'gelu':
        def gelu(x):
            return 0.5 * x * (1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3))))
        f = gelu

    network = build_vgg(input_var, num_classes, f, init_name)
    print("%d layers with weights, %d parameters" %
          (sum(hasattr(l, 'W')
               for l in lasagne.layers.get_all_layers(network)),
           lasagne.layers.count_params(network, trainable=True)))

    # load dataset
    print("Loading dataset...")
    X_train, y_train, X_test, y_test = dataset.load_dataset(
        path=os.path.join(os.path.dirname(__file__), 'data'))
    # if validate == 'test':
    X_val, y_val = X_test, y_test
    # elif validate:
    #     X_val, y_val = X_train[-5000:], y_train[-5000:]
    #     X_train, y_train = X_train[:-5000], y_train[:-5000]

    # define training function
    print("Compiling training function...")
    prediction = ll.get_output(network)
    prediction = T.clip(prediction, 1e-7, 1 - 1e-7)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean()
    l2_loss = 5e-4 * lasagne.regularization.regularize_network_params(
        network, lasagne.regularization.l2, {'regularizable': True})
    params = lasagne.layers.get_all_params(network, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(eta), name='eta')
    # updates = lasagne.updates.nesterov_momentum(
    #     loss + l2_loss, params, learning_rate=eta)
    updates = lasagne.updates.adam(
        loss + l2_loss, params, learning_rate=eta)
    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    l2_fn = theano.function([], l2_loss)

    # define validation/testing function
    print("Compiling testing function...")
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var).mean()
    test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction,
                                                           target_var).mean()
    test_fn = theano.function([input_var, target_var], [test_loss, test_err])

    start_epoch = 0
    if save_errors:
        errors = []

    if resume is True:
        errors = list(np.load(save_errors)['errors'].reshape(-1))
        for i in range(epochs-1,-1,-1):
            try:
                with np.load(save_weights+'_'+str(i)+'.npz') as f:
                    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
                lasagne.layers.set_all_param_values(network, param_values)
                start_epoch = i+1
                print('Restored!', i, start_epoch)
                break
            except:
                True
        if start_epoch == 0:
            assert False, "could not resume"

    # Finally, launch the training loop.
    print("Starting training...")

    orig_lr = eta.get_value()
    for epoch in range(start_epoch, epochs):
        # eta.set_value(lasagne.utils.floatX(orig_lr * max(0.1 ** (epoch//25), 1e-7)))

        # restoration friendly code
        # drop at half and then at three fourths through training
        if 100 <= epoch < 125:
            eta.set_value(orig_lr * lasagne.utils.floatX(0.1))
        elif epoch >= 125:
            eta.set_value(orig_lr * lasagne.utils.floatX(0.01))

        # In each epoch, we do a full pass over the training data:
        train_loss = 0
        train_batches = len(X_train) // batchsize
        batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True)
        # augmentation is mandatory!
        batches = dataset.augment_minibatches(batches)
        batches = generate_in_background(batches)
        batches = progress.progress(
            batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs),
            total=train_batches)
        for inputs, targets in batches:
            train_loss += train_fn(inputs, targets)

        # And possibly a full pass over the validation data:
        # if validate:
        #     val_loss = 0
        #     val_err = 0
        #     val_batches = len(X_val) // batchsize
        #     for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False):
        #         loss, err = test_fn(inputs, targets)
        #         val_loss += loss
        #         val_err += err
        # else:
        test_loss = 0
        test_err = 0
        test_batches = len(X_test) // batchsize
        for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False):
            loss, err = test_fn(inputs, targets)
            test_loss += loss
            test_err += err

        # Then we print the results for this epoch:
        train_loss /= train_batches
        l2_loss = l2_fn()
        print("  CE loss:\t%.6f" % train_loss)
        print("  L2 loss:      \t%.6f" % l2_loss)
        print("  Loss:      \t%.6f" % (train_loss+l2_loss))
        if save_errors:
            errors.extend([train_loss, l2_loss])

        # if validate:
        #     val_loss /= val_batches
        #     val_err /= val_batches
        #     print("  validation loss:\t%.6f" % val_loss)
        #     print("  validation error:\t%.2f%%" % (val_err * 100))
        #     if save_errors:
        #         errors.extend([val_loss, val_err])
        # else:
        test_loss /= test_batches
        test_err /= test_batches
        print("  test loss:\t%.6f" % test_loss)
        print("  test error:\t%.2f%%" % (test_err * 100))
        if save_errors:
            errors.extend([test_loss, test_err])

        if epoch % 25 == 0 and epoch > 100:
            # Optionally, we dump the network weights to a file
            if save_weights:
                np.savez(save_weights+'_'+str(epoch), *lasagne.layers.get_all_param_values(network))

            # Optionally, we dump the learning curves to a file
            if save_errors:
                np.savez(save_errors, errors=np.asarray(errors).reshape(epoch+1, -1))

    # After training, we compute and print the test error:
    test_loss = 0
    test_err = 0
    test_batches = len(X_test) // batchsize
    for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        loss, err = test_fn(inputs, targets)
        test_loss += loss
        test_err += err
    print("Final results:")
    print("  test loss:\t\t%.6f" % (test_loss / test_batches))
    print("  test error:\t\t%.2f%%" % (test_err / test_batches * 100))

    # we dump the network weights to a file
    np.savez(save_weights, *lasagne.layers.get_all_param_values(network))
    # Optionally, we dump the learning curves to a file
    np.savez(save_errors, errors=np.asarray(errors).reshape(epochs, -1))
def main():
    parser = argparse.ArgumentParser(description="Look for streets in the OGD table that are not covered by "
                                                 "OpenStreetMap and write them into another table.")
    parser.add_argument("-H", "--hostname", dest="hostname", required=False, help="Host name or IP Address")
    parser.add_argument("-d", "--database", dest="database", required=True, help="The name of the database")
    parser.add_argument("-r", "--region", dest="region", required=True, help="The region to extract streets for")
    parser.add_argument("-t", "--table", dest="table", required=True, help="The database table to read from")
    parser.add_argument("-P", "--primary-key", dest="primary_key", required=True, help="The name of the primary key column")
    parser.add_argument("-n", "--name-column", dest="name_column", required=True, help="The name column")
    parser.add_argument("-s", "--source-tag", dest="source_tag", required=True, help="The text that should be written into the OSM source tag")
    parser.add_argument("-u", "--user", dest="user", required=False, help="The database user")
    parser.add_argument("-p", "--password", dest="password", required=False, help="The database password")

    args = parser.parse_args()

    show_progress = False

    # Read and parse the street type mapping file
    street_mapping_select = "null, null,"

    with open(os.path.join(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))), "street-type-mapping.json")) as data_file:
        data = json.load(data_file)

        if args.region in data:
            region_data = data[args.region]
            street_mapping_select = "case "

            for highway_mapping_key in region_data["highway"]:
                street_mapping_select += "when s.%s='%s' then '%s'\n" % ("edgecatego", highway_mapping_key, region_data["highway"][highway_mapping_key])

            street_mapping_select += "end as highway, "

            street_mapping_select += "case "

            for fixme_mapping_key in region_data["fixme"]:
                street_mapping_select += "when s.%s='%s' then '%s'\n" % ("edgecatego", fixme_mapping_key, region_data["fixme"][fixme_mapping_key])

            street_mapping_select += "end as fixme, "


    # Try to connect
    try:
        conn = psycopg2.connect(
            host=args.hostname,
            database=args.database,
            user=args.user,
            password=args.password
        )
    except Exception as e:
        print("I am unable to connect to the database (%s)." % e.message)
        sys.exit(1)

    cur = conn.cursor()

    try:
        cur.execute("""
select """ + args.primary_key + """ from """ + args.table + """
where """ + args.primary_key + """ not in (
    select """ + args.primary_key + """ from """ + args.table + """_uncovered
)
        """)
    except Exception as e:
        print("I can't SELECT the not-yet-calculated streets (%s)!" % e)

    rows = cur.fetchall()
    total = len(rows)
    processed = 0

    statement = """
insert into """ + args.table + """_uncovered
    select objectid, name, highway, fixme, geom, source, round(cast((sum(intersection_length) / ogd_length * 100.0) as numeric), 0) as coverage
    from
        (select
            s.""" + args.primary_key + """ as objectid,
            s.""" + args.name_column + """ as name,
            """ + street_mapping_select + """
            ST_AsEWKT(s.geom2) as geom,
            cast('""" + args.source_tag + """' as text) as source,
            ST_Length(ST_Intersection(l.buffer, s.geom2)) as intersection_length,
            ST_Length(s.geom2) as ogd_length
        from osm_street_buffer l
        right join """ + args.table + """ s on (
            ST_Intersects(l.way, ST_Envelope(s.geom2)))
        where s.""" + args.primary_key + """ = %s
        group by """ + args.primary_key + """, """ + args.name_column + """, s.edgecatego, s.geom2, intersection_length) as subquery
    group by objectid, name, highway, fixme, geom, source, ogd_length;
    """

    if show_progress:
        progress.startprogress("Processing all streets")

    for source_street in rows:
        if show_progress:
            percent = processed / total * 100.0
            progress.progress(round(percent, 0))

        objectid = source_street[0]

        try:
            cur.execute(statement, (objectid,))
            conn.commit()
        except Exception as e:
            print("I can't INSERT the data (%s)!" % e)
            sys.exit(1)

        processed += 1

    if show_progress:
        progress.endprogress()
Example #50
0
def train_test(depth, growth_rate, dropout, augment, validate, epochs,
               eta, save_weights, save_errors, resume, nonlinearity_name,
               use_cifar10, batchsize=64):
    # import (deferred until now to make --help faster)
    import numpy as np
    import theano
    import theano.tensor as T
    import lasagne

    import densenet_fast_custom as densenet  # or "import densenet" for slower version
    if use_cifar10 is True:
        import cifar10 as dataset
        num_classes = 10
    else:
        print('Using CIFAR-100')
        import cifar100 as dataset
        num_classes = 100
    import progress

    # instantiate network
    print("Instantiating network...")
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    network = densenet.build_densenet(input_var=input_var, depth=depth, classes=num_classes,
                                      growth_rate=growth_rate, dropout=dropout,
                                      nonlinearity_name=nonlinearity_name)
    print("%d layers with weights, %d parameters" %
          (sum(hasattr(l, 'W')
               for l in lasagne.layers.get_all_layers(network)),
           lasagne.layers.count_params(network, trainable=True)))

    # load dataset
    print("Loading dataset...")
    X_train, y_train, X_test, y_test = dataset.load_dataset(
        path=os.path.join(os.path.dirname(__file__), 'data'))
    if validate == 'test':
        X_val, y_val = X_test, y_test
    elif validate:
        X_val, y_val = X_train[-5000:], y_train[-5000:]
        X_train, y_train = X_train[:-5000], y_train[:-5000]

    # define training function
    print("Compiling training function...")
    prediction = lasagne.layers.get_output(network)
    prediction = T.clip(prediction, 1e-7, 1 - 1e-7)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere
    l2_loss = 1e-4 * lasagne.regularization.regularize_network_params(
            network, lasagne.regularization.l2, {'trainable': True})
    params = lasagne.layers.get_all_params(network, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(eta), name='eta')
    updates = lasagne.updates.nesterov_momentum(
            loss + l2_loss, params, learning_rate=eta, momentum=0.9)
    # updates = lasagne.updates.adam(
    #         loss + l2_loss, params, learning_rate=eta)
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # define validation/testing function
    print("Compiling testing function...")
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    update_var_prediction = lasagne.layers.get_output(network, deterministic=True, batch_norm_update_averages=True,
                                                      batch_norm_use_averages=False)
    loss_var_update = lasagne.objectives.categorical_crossentropy(update_var_prediction, target_var)
    loss_var_update = loss_var_update.mean()
    update_var_fn = theano.function([input_var, target_var], loss_var_update)
    test_loss = test_loss.mean()
    test_acc = lasagne.objectives.categorical_accuracy(test_prediction,
                                                       target_var).mean()
    test_fn = theano.function([input_var, target_var], [test_loss, test_acc])
    l2_fn = theano.function([], l2_loss)

    with np.load("./wider_07_100.npz") as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(network, param_values)

    # Finally, launch the training loop.
    print("Starting training...")
    if save_errors:
        errors = []

    val_err = 0
    val_acc = 0
    val_batches = len(X_test) // batchsize
    for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        err, acc = test_fn(inputs, targets)
        val_err += err
        val_acc += acc
    if validate or True:  # HACK: validate on test set, for debugging
        print("  validation loss:\t%.6f" % (val_err / val_batches))
        print("  validation error:\t%.2f%%" % (
            100 - val_acc / val_batches * 100))

    for epoch in range(5):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = len(X_train) // batchsize
        batches = dataset.iterate_minibatches(X_train, y_train, batchsize,
                                              shuffle=True)
        if augment:
            batches = dataset.augment_minibatches(batches)
            batches = generate_in_background(batches)
        batches = progress.progress(
                batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs),
                total=train_batches)
        for inputs, targets in batches:
            train_err += update_var_fn(inputs, targets)

        # And possibly a full pass over the validation data:
        if validate:
            val_err = 0
            val_acc = 0
            val_batches = len(X_val) // batchsize
            for inputs, targets in dataset.iterate_minibatches(X_val, y_val,
                                                               batchsize,
                                                               shuffle=False):
                err, acc = test_fn(inputs, targets)
                val_err += err
                val_acc += acc
        else:
            # HACK: validate on test set, for debugging
            val_err = 0
            val_acc = 0
            val_batches = len(X_test) // batchsize
            for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                               batchsize,
                                                               shuffle=False):
                err, acc = test_fn(inputs, targets)
                val_err += err
                val_acc += acc

        # Then we print the results for this epoch:
        print("  training loss:\t%.6f" % (train_err / train_batches))
        l2_err = l2_fn()
        print("  L2 loss:      \t%.6f" % l2_err)
        if save_errors:
            errors.extend([train_err / train_batches, l2_err])
        if validate or True:  # HACK: validate on test set, for debugging
            print("  validation loss:\t%.6f" % (val_err / val_batches))
            print("  validation error:\t%.2f%%" % (
                100 - val_acc / val_batches * 100))
            if save_errors:
                errors.extend([val_err / val_batches,
                               100 - val_acc / val_batches * 100])

        if save_weights and epoch % 20 == 0:
            np.savez(save_weights, *lasagne.layers.get_all_param_values(network))
            print('Saved')

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = len(X_test) // batchsize
    for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        err, acc = test_fn(inputs, targets)
        test_err += err
        test_acc += acc
    print("Final results:")
    print("  test loss:\t\t%.6f" % (test_err / test_batches))
    print("  test error:\t\t%.2f%%" % (
        100 - test_acc / test_batches * 100))
Example #51
0
def train_test(depth, growth_rate, dropout, augment, validate, epochs,
               eta, save_weights, save_errors, resume, nonlinearity_name,
               use_cifar10, batchsize):
    # import (deferred until now to make --help faster)
    import numpy as np
    import theano
    import theano.tensor as T
    import lasagne

    import densenet_fast as densenet  # or "import densenet" for slower version
    if use_cifar10 is True:
        import cifar10 as dataset
        num_classes = 10
    else:
        print('Using CIFAR-100')
        import cifar100 as dataset
        num_classes = 100
    import progress

    # instantiate network
    print("Instantiating network...")
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    network = densenet.build_densenet(input_var=input_var, depth=depth, classes=num_classes,
                                      growth_rate=growth_rate, dropout=dropout,
                                      nonlinearity_name=nonlinearity_name)
    print("%d layers with weights, %d parameters" %
          (sum(hasattr(l, 'W')
               for l in lasagne.layers.get_all_layers(network)),
           lasagne.layers.count_params(network, trainable=True)))

    # load dataset
    print("Loading dataset...")
    X_train, y_train, X_test, y_test = dataset.load_dataset(
            path=os.path.join(os.path.dirname(__file__), 'data'))
    if validate == 'test':
        X_val, y_val = X_test, y_test
    elif validate:
        X_val, y_val = X_train[-5000:], y_train[-5000:]
        X_train, y_train = X_train[:-5000], y_train[:-5000]

    # define training function
    print("Compiling training function...")
    prediction = lasagne.layers.get_output(network)
    # note: The Keras implementation clips predictions for the categorical
    #       cross-entropy. This doesn't seem to have a positive effect here.
    prediction = T.clip(prediction, 1e-7, 1 - 1e-7)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean()
    # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere.
    #       However, 1e-4 seems to work better than 5e-5, so we use 1e-4.
    # note: Torch includes biases in L2 decay. This seems to be important! So
    #       we decay all 'trainable' parameters, not just 'regularizable' ones.
    l2_loss = 1e-4 * lasagne.regularization.regularize_network_params(
            network, lasagne.regularization.l2, {'trainable': True})
    params = lasagne.layers.get_all_params(network, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(eta), name='eta')
    updates = lasagne.updates.nesterov_momentum(
            loss + l2_loss, params, learning_rate=eta, momentum=0.9)
    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    l2_fn = theano.function([], l2_loss)

    # define validation/testing function
    print("Compiling testing function...")
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var).mean()
    test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction,
                                                           target_var).mean()
    test_fn = theano.function([input_var, target_var], [test_loss, test_err])

    start_epoch = 0
    if save_errors:
        errors = []

    if resume is True:
        errors = list(np.load(save_errors)['errors'].reshape(-1))
        for i in range(epochs-1,-1,-1):
            try:
                with np.load(save_weights+'_'+str(i)+'.npz') as f:
                    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
                lasagne.layers.set_all_param_values(network, param_values)
                start_epoch = i+1
                print(i, start_epoch)
                break
            except:
                True
        if start_epoch == 0:
            assert False, "could not resume"

    # Finally, launch the training loop.
    print("Starting training...")

    orig_lr = eta.get_value()
    for epoch in range(start_epoch, epochs):
        # shrink learning rate at 50% and 75% into training
        if epochs // 2 <= epoch < epochs * 3 // 4:
            eta.set_value(orig_lr * lasagne.utils.floatX(0.1))
        elif epoch >= epochs * 3 // 4:
            eta.set_value(orig_lr * lasagne.utils.floatX(0.01))

        # In each epoch, we do a full pass over the training data:
        train_loss = 0
        train_batches = len(X_train) // batchsize
        batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True)
        if augment:
            batches = dataset.augment_minibatches(batches)
            batches = generate_in_background(batches)
        batches = progress.progress(
                batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs),
                total=train_batches)
        for inputs, targets in batches:
            train_loss += train_fn(inputs, targets)

        # And possibly a full pass over the validation data:
        if validate:
            val_loss = 0
            val_err = 0
            val_batches = len(X_val) // batchsize
            for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False):
                loss, err = test_fn(inputs, targets)
                val_loss += loss
                val_err += err
        else:
            test_loss = 0
            test_err = 0
            test_batches = len(X_test) // batchsize
            for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False):
                loss, err = test_fn(inputs, targets)
                test_loss += loss
                test_err += err

        # Then we print the results for this epoch:
        train_loss /= train_batches
        l2_loss = l2_fn()
        print("  training loss:\t%.6f" % train_loss)
        print("  L2 loss:      \t%.6f" % l2_loss)
        if save_errors:
            errors.extend([train_loss, l2_loss])

        if validate:
            val_loss /= val_batches
            val_err /= val_batches
            print("  validation loss:\t%.6f" % val_loss)
            print("  validation error:\t%.2f%%" % (val_err * 100))
            if save_errors:
                errors.extend([val_loss, val_err])
        else:
            test_loss /= test_batches
            test_err /= test_batches
            print("  test loss:\t%.6f" % test_loss)
            print("  test error:\t%.2f%%" % (test_err * 100))
            if save_errors:
                errors.extend([test_loss, test_err])

        if epoch % 1 == 0:
            # Optionally, we dump the network weights to a file
            if save_weights:
                np.savez(save_weights+'_'+str(epoch), *lasagne.layers.get_all_param_values(network))

            # Optionally, we dump the learning curves to a file
            if save_errors:
                np.savez(save_errors, errors=np.asarray(errors).reshape(-1, 4))

    # After training, we compute and print the test error:
    test_loss = 0
    test_err = 0
    test_batches = len(X_test) // batchsize
    for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        loss, err = test_fn(inputs, targets)
        test_loss += loss
        test_err += err
    print("Final results:")
    print("  test loss:\t\t%.6f" % (test_loss / test_batches))
    print("  test error:\t\t%.2f%%" % (test_err / test_batches * 100))

    # Optionally, we dump the network weights to a file
    if save_weights:
        np.savez(save_weights, *lasagne.layers.get_all_param_values(network))
    # Optionally, we dump the learning curves to a file
    if save_errors:
        np.savez(save_errors, errors=np.asarray(errors).reshape(epochs, -1))
Example #52
0
def main(files, module, **extra):
    '''Main driver for a generic file processor
    
    The workflow proceeds as follows.
    
    #. For the root process, call :py:func:`init_root` and update any input files
    #. Test if a subset of files have already been processed and remove these
    #. Broadcast new set of files to be processed to all nodes (if necessary)
    #. Broadcast files already processed to all nodes (if necessary)
    #. ??
    #. Initialize data for each process including root
    #. If no files, finalize and return
    #. Initialize data for each worker process
    #. Process files
    #. Reduce data to root worker
    #. Finalize data on root worker
    
    .. sourcecode:: py

        >>> from core.app import file_processor
        >>> from arachnid.util import crop
        >>> file_processor.main(['stack_01.spi'], crop)
    
    :Parameters:
        
        files : list
                List of filenames, tuple groups or lists of filenames
        module : module
                 Main module containing entry points
        extra : dict
                Unused extra keyword arguments
    '''
    
    progname = os.path.basename(sys.argv[0])
    if progname[:4] == 'ara-': progname = progname[4:]
    if progname[:3] == 'sp-': progname = progname[3:]
    restart_file = os.path.join(os.path.dirname(extra['output']), '.restart.'+progname) if 'output' in extra else None
    if extra['worker_count'] > multiprocessing.cpu_count():
        _logger.warn("Number of workers exceeds number of cores: %d > %d"%(extra['worker_count'], multiprocessing.cpu_count()))
    
    _logger.debug("File processer - begin")
    process, initialize, finalize, reduce_all, init_process, init_root = getattr(module, "process"), getattr(module, "initialize", None), getattr(module, "finalize", None), getattr(module, "reduce_all", None), getattr(module, "init_process", None), getattr(module, "init_root", None)
    monitor=None
    if mpi_utility.is_root(**extra):
        if init_root is not None:
            _logger.debug("Init-root")
            f = init_root(files, extra)
            if f is not None: files = f
        _logger.debug("Test dependencies1: %d"%len(files))
        files, finished = check_dependencies(files, restart_file, **extra)
        extra['finished'] = finished
        _logger.debug("Test dependencies2: %d"%len(files))
    else: extra['finished']=None
    _logger.debug("Start processing1")
    tfiles = mpi_utility.broadcast(files, **extra)
    
    # Why?
    if not mpi_utility.is_root(**extra):
        tfiles = set([os.path.basename(f) for f in tfiles])
        files = [f for f in files if f in tfiles]
    _logger.debug("Start processing2")
    
    extra['finished'] = mpi_utility.broadcast(extra['finished'], **extra)
    if initialize is not None:
        _logger.debug("Init")
        f = initialize(files, extra)
        _logger.debug("Init-2")
        if f is not None: files = f
        #files = mpi_utility.broadcast(files, **extra)
    _logger.debug("Start processing3")
    if len(files) == 0:
        if mpi_utility.is_root(**extra):
            _logger.debug("No files to process")
            if finalize is not None: finalize(files, **extra)
        return
    
    if mpi_utility.is_root(**extra):
        _logger.debug("Setup progress monitor")
        monitor = progress(len(files))
    
    if restart_file is not None: tracing.backup(restart_file)
    restart_fout = open(restart_file, 'w') if restart_file is not None else None
    if restart_fout is not None:
        for f in finished:
            fileid = spider_utility.spider_id(f) if spider_utility.is_spider_filename(f) else f
            restart_fout.write(str(fileid)+'\n')
    current = 0
    _logger.debug("Start processing")
    ignored_errors=[0]
    for index, filename in mpi_utility.mpi_reduce(process, files, init_process=init_process, ignored_errors=ignored_errors, **extra):
        if mpi_utility.is_root(**extra):
            try:
                monitor.update()
                if reduce_all is not None:
                    current += 1
                    try:
                        filename = reduce_all(filename, file_index=index, file_count=len(files), file_completed=current, **extra)
                    except:
                        ignored_errors[0]+=1
                        if _logger.getEffectiveLevel()==logging.DEBUG or 1 == 1:
                            _logger.exception("Reduce to root failed")
                        else:
                            _logger.warn("Reduce to root failed - report this problem to the developer")
                    if isinstance(filename, tuple):
                        filename, msg = filename
                    else: msg=filename
                    _logger.info("Finished: %d,%d - Time left: %s - %s"%(current, len(files), monitor.time_remaining(True), str(msg)))
                else:
                    _logger.info("Finished: %d,%d - Time left: %s"%(current, len(files), monitor.time_remaining(True)))
            except:
                _logger.exception("Error in root process")
                del files[:]
            else:
                if restart_fout is not None:
                    if spider_utility.is_spider_filename(filename): filename=spider_utility.spider_id(filename)
                    restart_fout.write(str(filename)+'\n')
                    restart_fout.flush()
    if ignored_errors[0] > 0:
        see_also="\n\nSee .%s.crash_report for more details"%os.path.basename(sys.argv[0])
        _logger.warn("Errors occurred during run"+see_also)
    if restart_fout is not None: restart_fout.close()
    if len(files) == 0:
        raise ValueError, "Error in root process"
    if mpi_utility.is_root(**extra):
        if finalize is not None: finalize(files, **extra)
Example #53
0
def dehashilate():
    """Go through the collection and clean up MD5-ish names

    Search the cards for sounds or images with file names that
    look like MD5 hashes, rename the files and change the notes.

    """
    mdir = mw.col.media.dir()
    new_names_dict = {}
    rename_exec_list = []
    bad_mv_text = u''
    mw.checkpoint(_("Dehashilate"))
    nids = mw.col.db.list("select id from notes")
    for nid in progress(nids, "Dehashilating", "This is all wrong!"):
        n = mw.col.getNote(nid)
        for (name, value) in n.items():
            for match in re.findall(hash_name_pat, value):
                rs = re.search(hash_name_pat, value)
                if None == rs:
                    # Should be redundant with the for match ...:
                    # loop. RAS 2012-06-23
                    continue
                old_name = '{0}{1}'.format(rs.group(1), rs.group(2))
                try:
                    new_name = new_names_dict[old_name]
                except KeyError:
                    try:
                        new_name = new_media_name(rs.group(1), rs.group(2), n)
                    except ValueError:
                        continue
                    do_rename = True
                else:
                    do_rename = False
                if do_rename:
                    src = os.path.join(mdir, old_name)
                    dst = os.path.join(mdir, new_name)
                    try:
                        os.rename(src, dst)
                    except OSError:
                        # print u'Problem movivg {0} → {1}\n'.format(src, dst)
                        bad_mv_text += u'{0} → {1}\n'.format(src, dst)
                    else:
                        new_names_dict[old_name] = new_name
                    n[name] = value.replace(old_name, new_name)
                    n.flush()
                    rename_exec_list.append(dict(nid=nid,
                                                 flds=n.joinedFields()))
    mw.col.db.executemany("update notes set flds =:flds where id =:nid",
                          rename_exec_list)
    # This is a bit of voodo code. Without it the cards weren't
    # synced. Maybe this helps. (Cribbed from anki.find, but don't
    # keep extra list of nids.) RAS 2012-06-20
    # And it doesn't work. RAS 2012-07-13

    # """File
    # "/home/roland/Anki-tests/addons/dehashilator/dehashilator.py",
    # line 268, in dehashilate
    # mw.col.updateFieldCache([re_dict[nids] for re_dict in
    # rename_exec_list])
    # TypeError: unhashable type: 'list'"""
     # mw.col.updateFieldCache([re_dict[nids] for re_dict in rename_exec_list])
    mw.reset()
    if bad_mv_text:
        showText(_(u'These files weren’t renamed:\n') + bad_mv_text)