コード例 #1
0
ファイル: parsePDF.py プロジェクト: QChASM/AaronTools.py
def main(args):
    geom_patt = re.compile("([A-Z][a-z]*)((?:\s+-?\d+\.?\d*){3})")
    float_patt = re.compile("-?\d+\.?\d*")

    all_names = []
    atoms = []
    name = None
    for i, page in enumerate(extract_pages(args.infile)):
        print("parsing page {: 4d} please wait...".format(i + 1), end="\r")
        for element in page:
            last_line = None
            if hasattr(element, "get_text"):
                for line in element:
                    text = line.get_text()
                    match = geom_patt.search(text)
                    if not match and last_line and atoms:
                        name_match = geom_patt.search(name)
                        if name_match:
                            geom = Geometry(all_names[-1] + ".xyz")
                            geom.atoms.extend(atoms)
                        else:
                            geom = Geometry(atoms)
                            geom.name = name
                            geom.comment = name
                            if args.directory != "CURRENTDIR":
                                geom.name = os.path.join(
                                    args.directory, geom.name)
                            orig_name = geom.name
                            i = 2
                            while geom.name in all_names:
                                geom.name = "{}_{:03d}".format(orig_name, i)
                                i += 1
                        if args.sort:
                            geom.refresh_connected()
                            geom.refresh_ranks()
                            geom.atoms = geom.reorder()[0]
                        geom.write()
                        all_names.append(geom.name)
                        atoms = []
                        name = None
                        # print()
                        # print(geom.name, len(geom.atoms))
                        # print(geom)
                    if match:
                        if not name:
                            name = last_line
                        element = match.group(1)
                        coords = float_patt.findall(match.group(2))
                        atoms.append(Atom(element, [float(c) for c in coords]))
                    last_line = text.strip()
コード例 #2
0
        prev_conf = conf

        bad_subs = []
        print_geom = geom
        if args.remove_clash:
            print_geom = Geometry([a.copy() for a in geom])
            # print_geom.update_geometry(geom.coordinates.copy())
            sub_list = [
                Substituent(print_geom.find([at.name for at in sub]),
                            detect=False,
                            end=print_geom.find_exact(sub.end.name)[0])
                for sub in substituents
            ]
            bad_subs = print_geom.remove_clash(sub_list)
            # somehow the atoms get out of order
            print_geom.atoms = sorted(print_geom.atoms,
                                      key=lambda a: float(a.name))

        if args.skip_clash:
            if bad_subs:
                skipped += 1
                continue

            clashing = False
            for j, a1 in enumerate(print_geom.atoms):
                for a2 in print_geom.atoms[:j]:
                    if a2 not in a1.connected and a1.is_connected(a2):
                        clashing = True
                        break

                if clashing:
                    break