def main(args): geom_patt = re.compile("([A-Z][a-z]*)((?:\s+-?\d+\.?\d*){3})") float_patt = re.compile("-?\d+\.?\d*") all_names = [] atoms = [] name = None for i, page in enumerate(extract_pages(args.infile)): print("parsing page {: 4d} please wait...".format(i + 1), end="\r") for element in page: last_line = None if hasattr(element, "get_text"): for line in element: text = line.get_text() match = geom_patt.search(text) if not match and last_line and atoms: name_match = geom_patt.search(name) if name_match: geom = Geometry(all_names[-1] + ".xyz") geom.atoms.extend(atoms) else: geom = Geometry(atoms) geom.name = name geom.comment = name if args.directory != "CURRENTDIR": geom.name = os.path.join( args.directory, geom.name) orig_name = geom.name i = 2 while geom.name in all_names: geom.name = "{}_{:03d}".format(orig_name, i) i += 1 if args.sort: geom.refresh_connected() geom.refresh_ranks() geom.atoms = geom.reorder()[0] geom.write() all_names.append(geom.name) atoms = [] name = None # print() # print(geom.name, len(geom.atoms)) # print(geom) if match: if not name: name = last_line element = match.group(1) coords = float_patt.findall(match.group(2)) atoms.append(Atom(element, [float(c) for c in coords])) last_line = text.strip()
prev_conf = conf bad_subs = [] print_geom = geom if args.remove_clash: print_geom = Geometry([a.copy() for a in geom]) # print_geom.update_geometry(geom.coordinates.copy()) sub_list = [ Substituent(print_geom.find([at.name for at in sub]), detect=False, end=print_geom.find_exact(sub.end.name)[0]) for sub in substituents ] bad_subs = print_geom.remove_clash(sub_list) # somehow the atoms get out of order print_geom.atoms = sorted(print_geom.atoms, key=lambda a: float(a.name)) if args.skip_clash: if bad_subs: skipped += 1 continue clashing = False for j, a1 in enumerate(print_geom.atoms): for a2 in print_geom.atoms[:j]: if a2 not in a1.connected and a1.is_connected(a2): clashing = True break if clashing: break