def predict(self, images, indices=None, batch_size=3): # for some reason batch size 3 runs faster than larger batch sizes if indices is None: indices = range(len(images)) else: indices = np.r_[tuple(indices)] # images shape: [count, h, w, rgb=3]; use h and w from image pred = np.empty((len(indices), images.shape[1], images.shape[2]), dtype=np.bool) batches = chunks(indices, batch_size) conf_mat = np.zeros([2, 2], dtype=np.int) for i, batch in enumerate(batches): start = i * batch_size pred[start:start + batch_size] = self.session.run( self.pred_labels, {self.images: images[batch]}) return pred
def train(self, images, labels, indices=None, epochs=None, batch_size=None): if indices is None: indices = list(range(len(images))) else: indices = np.r_[tuple(indices)] if epochs == None: epochs = self.epochs if batch_size == None: batch_size = self.batch_size writer = tf.summary.FileWriter(self.log_path, self.graph) num_batches = ceil(len(indices) / self.batch_size) for epoch in range(1, epochs + 1): random.shuffle(indices) batches = chunks(indices, batch_size) start = time.time() for i, frames in enumerate(batches, 1): summary, _ = self.session.run( [self.summary, self.optimizer], { self.images: images[frames], self.target_labels: labels[frames] }) writer.add_summary( summary, tf.train.global_step(self.session, self.step)) # if i % 10 == 0 or i == num_batches: print("Epoch {}: batch {} of {} | {:.3f}s".format( epoch, i, num_batches, time.time() - start), end="\r") print() writer.close()
def test(self, images, expected, indices=None, batch_size=3): # for some reason batch size 3 runs faster than larger batch sizes if indices is None: indices = range(len(input)) else: indices = np.r_[tuple(indices)] # images shape: [count, h, w, rgb=3]; use h and w from image pred = np.empty((len(indices), images.shape[1], images.shape[2]), dtype=np.bool) batches = chunks(indices, batch_size) conf_mat = np.zeros([2, 2], dtype=np.int) for i, batch in enumerate(batches): start = i * batch_size pred[start:start + batch_size], cf = self.session.run( [self.pred_labels, self.confusion_matrix], { self.images: images[batch], self.target_labels: expected[batch] }) conf_mat += cf accuracy = conf_mat.diagonal().sum() / conf_mat.sum() precision = conf_mat[1, 1] / conf_mat[:, 1].sum() recall = conf_mat[1, 1] / conf_mat[1].sum() f1 = 0 if precision == 0 or recall == 0 else hmean((precision, recall)) print("Accuracy:", accuracy) print("Precision:", precision) print("Recall:", recall) print("F1 score:", f1) print("Confusion matrix") print(conf_mat) return pred, { "accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1, "confusion_matrix": conf_mat }
def set_free_variables(self, occupancynumber, fvalue='0.5'): """ Inserts additional free variables according to the occ parameter This function starts at the end of parse_dsr_line() so we don't have to care about it anywhere else. :param occupancynumber: string, like '21.0' :type occupancynumber: string :param fvarlines: list, list of line numbers where FVAR is located in the res file """ fvar_list = self.get_fvarlist() # free variables in the res file varlen = self.get_fvar_count() occupancynumber = occupancynumber.strip('-') # how many numbers do we have?: # the occupancynumber is split in the fvar part and the occupancy part: num = occupancynumber.split('.') fvar = int(num[0]) // 10 # e.g. 20.5 is fvar 2 and occupancy 0.5 if fvar == 0: fvar = 1 difference = (fvar - varlen) if fvar > 1: if difference > 0: for i in range(difference): fvar_list.append(fvalue) # if an fvar is missing, add a new one else: # make sure the occupancy of the disoerder parts get not < 0: if len(fvar_list) - (fvar - 1) >= 0: # make sure fvar_value = fvar_list[fvar - 1] if (float(occupancynumber) - (10 * int(fvar) + float(fvar_value))) < 0: fvar_list[fvar - 1] = '0.5' fvar_list = [str(x) for x in fvar_list] lines = misc.chunks(fvar_list, 7) if len(fvar_list) != 0: for line in self.fvarlines: self.remove_line(line, remove=True) # removes the old FVAR fvars = [' '.join(i) for i in lines] fvars = ['FVAR ' + i for i in fvars] self._reslist[self.fvarlines[0]] = ' \n'.join(fvars) + '\n' return fvars
def createPyramids(TopLevelQKeys, options): print("Generating pyramids") minlevel = options.CGLLevel maxlevel = options.MaxLevel - 1 level = maxlevel while level >= minlevel: subqkeys = [] for qKey in TopLevelQKeys: qKey = qKey[0] qkey = qKey while len(qkey) < level: qkey = qkey + '0' while qkey.startswith(qKey): subqkeys.append(qkey) qkey = QuadKeyIncrement(qkey) threads = options.ProcessingThreads tilesPerPart = int(len(subqkeys) / (4 * threads)) if tilesPerPart == 0: tilesPerPart = 1 if len(subqkeys) / tilesPerPart < threads: threads = int(len(subqkeys) / tilesPerPart) subchunks = chunks(subqkeys, tilesPerPart) pool = mp.Pool(threads) print("Processing level " + str(level).rjust(2, '0') + ", Threads " + str(threads).rjust(2, '0') + ", TPP " + str(tilesPerPart).rjust(2, '0') + ", Total tiles: " + str(len(subqkeys))) for idx, chunk in enumerate(subchunks): pool.apply_async(createLevelTileAndSubDeltasChunk, args=(idx, chunk, options.Basepath), callback=collect_result) pool.close() pool.join() level -= 1 for qKey in TopLevelQKeys: to8bit(qKey[0], options.Basepath)
TopLevelQKeys = CoordsToQkeyList(longlatUL, longlatLR, Options) createCoveragePolyShapefile(TopLevelQKeys, Options.Basepath) createGMVisualizationScript(Options) diskspaceneeded = calculateMaxDiskUsageMB(len(TopLevelQKeys), Options) print("Maximum space needed will be around " + str(int(diskspaceneeded)) + " MB") print("If padding tiles are not \"full\", it will be less") print("Showing quadkeys over source data in global mapper") print( "When padding enabled, outermost tiles don't have to be completely covered" ) print("1/3 of width/height should be enough") print("Check if coverage is OK") visualize(Options.GMExePath, Options.Basepath) if click.confirm('Everything OK? Continue?', default=True): allSubQKeys = ListAllSubQKeys(TopLevelQKeys, Options.MaxLevel, Options.Basepath) totalSubCount = len(allSubQKeys) subsPerChunk = int(totalSubCount / (Options.GMThreads * 2)) subQKeyChunks = chunks(allSubQKeys, subsPerChunk) for index, chunk in enumerate(subQKeyChunks): CreateGlobalMapperScript(index, chunk, Options) RunGMScripts(Options.GMExePath, Options.Basepath, Options.GMThreads) createPyramids(TopLevelQKeys, Options) createCGLs(TopLevelQKeys, Options.Basepath, Options.ProcessingThreads) makePackageFolder(TopLevelQKeys, Options, manifest) print("Done!") print("Ready to fly folder " + Options.TargetName + " is at") print(Options.Basepath) print("Just copy to Community to check if it works")
def main(self): """ main object to run DSR as command line program """ dbatoms = [] # The database content: import atomhandling basefilename = filename_wo_ending(self.res_file) if not basefilename: print('*** Illegal option ***') sys.exit() if len(self.reslist) == 0: print("*** The input file is empty. Can not proceed! ***") sys.exit() find_atoms = atomhandling.FindAtoms(self.reslist) rle = ResListEdit(self.reslist, find_atoms) dsrp = DSRParser(self.reslist) self.fragment = dsrp.fragment restraints = self.gdb.get_restraints(self.fragment) # this is only executed once db_residue_string = self.gdb.get_resi(self.fragment) dbatoms = self.gdb.get_atoms(self.fragment, self.invert) # only the atoms of the dbentry as list # the atomtypes of the dbentry as list e.g. ['C', 'N', ...] db_atom_types = atomhandling.get_atomtypes(dbatoms) sf = atomhandling.SfacTable(self.reslist, db_atom_types) sfac_table = sf.set_sfac_table() # from now on this sfac table is set resi = Resi(dsrp, db_residue_string, find_atoms) # line where the dsr command is found in the resfile: if dsrp.cf3_active: from cf3fit import CF3 cf3 = CF3(rle, find_atoms, self.reslist, self.fragment, sfac_table, basefilename, dsrp, resi, self.res_file, self.options) if self.fragment == 'cf3': cf3.cf3(afix='130') if self.fragment == 'cf6': cf3.cf3(afix='120') if self.fragment == 'cf9': cf3.cf9() print('\nFinished...') sys.exit() # checks have to be after CF3, CF6 etc. self.gdb.check_consistency(self.fragment) self.gdb.check_db_atom_consistency(self.fragment) self.gdb.check_db_restraints_consistency(self.fragment) self.gdb.check_sadi_consistence(self.fragment) if dsrp.occupancy: rle.set_free_variables(dsrp.occupancy) restraints = remove_resi(restraints) # corrects the atom type according to the previous defined global sfac table: dbatoms = atomhandling.set_final_db_sfac_types(db_atom_types, dbatoms, sfac_table) if not dsrp.unit_line: print('*** No UNIT instruction in res file found! Can not proceed! ***') print('Inserting {} into res File.'.format(self.fragment)) if self.invert: print('Fragment inverted.') print('Source atoms: {}'.format(', '.join(dsrp.source))) print('Target atoms: {}'.format(', '.join(dsrp.target))) shx = ShelxlRefine(self.reslist, basefilename, find_atoms, self.options) shx.backup_shx_file() # several checks if the atoms in the dsr command line are consistent atomhandling.check_source_target(dsrp.source, dsrp.target, dbatoms) num = atomhandling.NumberScheme(self.reslist, dbatoms, dsrp) # returns also the atom names if residue is active fragment_numberscheme = num.get_fragment_number_scheme() print('Fragment atom names: {}'.format(', '.join(fragment_numberscheme))) dfix_head = '' if dsrp.dfix: restr = Restraints(self.fragment, self.gdb) dfix_12 = restr.get_formated_12_dfixes() dfix_13 = restr.get_formated_13_dfixes() flats = restr.get_formated_flats() restraints = dfix_12 + dfix_13 + flats # ##########Not using SHELXL for fragment fit: ########### print("--- Using fast fragment fit ---") if self.options.target_coords: target_coords = chunks(self.options.target_coords, 3) else: # {'C1': ['1.123', '0.7456', '3.245']} target_coordinates = find_atoms.get_atomcoordinates(dsrp.target) target_coords = [target_coordinates[key] for key in dsrp.target] # Uppercase is important here to avoid KeyErrors in source_atoms generation atnames = self.gdb.get_atomnames(self.fragment, uppercase=True) source_atoms = dict(zip(atnames, self.gdb.get_coordinates(self.fragment, cartesian=True, invert=self.invert))) # Coordinates only from the source, not the entire fragment: source_coords = [source_atoms[x] for x in dsrp.source] target_coords = [frac_to_cart(x, rle.get_cell()) for x in target_coords] from rmsd import fit_fragment # The source and target atom coordinates are fitted first. Then The complete fragment # is rotated and translated to the target position as calculated before. # parameter cartiesian has to be false here: fragment_coords = self.gdb.get_coordinates(self.fragment, cartesian=False, invert=self.invert) fitted_fragment, rmsd = fit_fragment(fragment_coords, source_atoms=source_coords, target_atoms=target_coords) # Moving back to the position of the first atom to have a reference: import numpy as np from rmsd import centroid # I have to make sure that I use the centroid of the correct atoms from target and source, # otherwise the fragment is shifted to a wrong position. # The third atom from the fragment e.g. has to be the third from the fragment to get # the correct centroid: center_difference = centroid(np.array(target_coords)) - \ centroid(np.array([list(fitted_fragment)[atnames.index(dsrp.source[x])] for x in range(len(source_coords))])) # finishing shift to correct centroid: fitted_fragment += center_difference # Or even lower than 0.1? if rmsd < 0.1: print('Fragment fit successful with RMSD of: {:8.3}'.format(rmsd)) else: print('*** Fragment fit might have failed with RMSD of: {:8.3} ***'.format(rmsd)) fitted_fragment = [cart_to_frac(x, rle.get_cell()) for x in fitted_fragment] afix_entry = [] e2s = Elem_2_Sfac(sfac_table) for at, coord, atype in zip(fragment_numberscheme, fitted_fragment, db_atom_types): sfac_num = str(e2s.elem_2_sfac(atype)) if dsrp.occupancy: occ = float(dsrp.occupancy) else: occ = 11.0 afix_entry.append(isoatomstr.format(at, sfac_num, coord[0], coord[1], coord[2], occ, 0.03)) afix_entry = "\n".join(afix_entry) new_atomnames = list(reversed(fragment_numberscheme)) same_resi = '' if not dsrp.resiflag: restraints = rename_restraints_atoms(new_atomnames, self.gdb.get_atomnames(self.fragment), restraints) else: restraints = resi.format_restraints(restraints) # SADI\n same_resi = ["SAME_{} {} > {}\n".format(resi.get_residue_class, new_atomnames[-1], new_atomnames[0])] # Adds a "SAME_resiclass firstatom > lastatom" to the afix: if not self.options.rigid_group: restraints += same_resi # if dsrp.resiflag: # <- Or should I do this? restraints += ["SIMU 0.04 0.08 1"] if not options.external_restr: restraints = remove_duplicate_restraints(self.reslist, restraints, resi.get_residue_class) restraints = wrap_headlines(restraints) dfx_file_name = '' if dsrp.part: afix_entry = "PART {} {}\n".format(dsrp.part, dsrp.occupancy) + afix_entry + "\nPART 0" if dsrp.resiflag: afix_entry = 'RESI {} {}\n{}\nRESI 0'.format(resi.get_residue_class, resi.get_resinumber, afix_entry) if self.options.rigid_group: afix_entry = 'AFIX 9\n' + afix_entry if options.external_restr and not self.rigid: pname, ext = os.path.splitext(basefilename + '.dfix') if dsrp.dfix: dfx_file_name = pname + "_dfx" + ext else: dfx_file_name = pname + ext dfx_file_name = write_dbhead_to_file(dsrp, dfx_file_name, restraints, resi.get_residue_class, resi.get_resinumber) if dsrp.resiflag: restraints = 'REM Restraints for residue {}:\n+{}\n' \ .format(resi.get_residue_class, dfx_file_name) else: restraints = 'REM Restraints for DSR fragment:\n+{}\n' \ .format(dfx_file_name) if self.options.rigid_group: afix_entry += '\nAFIX 0\n' # Adds the origin of restraints and fragment to res file: import textwrap source = textwrap.wrap("REM Restraints for Fragment {}, {} from: {}. " "Please cite https://doi.org/10.1107/S1600576718004508".format( self.fragment, self.gdb.get_fragment_name(self.fragment), self.gdb.get_src(self.fragment)), width=74, subsequent_indent='REM ') source = '\n'.join(source) + '\n' # check if restraints already inserted: for line in self.reslist: try: if line.split()[4] == self.fragment + ',': source = '' break except IndexError: continue # + 'AFIX 0\n' before hklf seems to be not needed after shelx-2013: self.reslist[dsrp.hklf_line - 1] = self.reslist[dsrp.hklf_line - 1] + afix_entry + '\n' if not self.rigid: self.reslist[dsrp.unit_line] = self.reslist[dsrp.unit_line] + source + ''.join(restraints) # write to file: self.rl.write_resfile(self.reslist, '.res') if dsrp.command == 'REPLACE': print("Replace mode active\n") self.rl = ResList(self.res_file) reslist = self.rl.get_res_list() self.reslist, find_atoms = atomhandling.replace_after_fit(self.rl, reslist, resi, fragment_numberscheme, rle.get_cell()) self.rl.write_resfile(self.reslist, '.res') os.remove(shx.backup_file)