Exemple #1
0
    def test_sorting(self):

        # Generate a few structures
        struct_n = 5

        aselist = []
        for n in range(struct_n):
            aselist.append(Atoms())

        testcoll = AtomsCollection(aselist)

        testcoll.set_array('sorter', np.array(range(struct_n, 0, -1)))
        testcoll.set_array('sorted', np.array(range(1, struct_n + 1)))

        testcoll = testcoll.sorted_byarray('sorter')

        self.assertTrue(
            np.all(testcoll.get_array('sorted') == range(struct_n, 0, -1)))
Exemple #2
0
print aColl.all.info, '\n\n'

# Collections can also be sliced like Numpy arrays for convenience
aColl02 = aColl[0:2]
aColl25 = aColl[2:5]

# Then join them together
aColl05 = aColl02 + aColl25

print "---- Collection slice lengths ---- \n"
print "aColl02 = {0}\taColl25 = {1}\taColl05 = {2}\n\n".format(
    aColl02.length, aColl25.length, aColl05.length)

# Collections can also store "arrays" of data, similarly to Atoms objects in ase
# These arrays' elements are tied each to one structure, and can be used to sort them

arr = range(10, 0,
            -1)  # Let's use this array to reverse the order of a collection

aColl.set_array('reversed_range', arr)

aCollSorted = aColl.sorted_byarray('reversed_range')

print "---- Getting an array from a collection ---- \n"
print "Unsorted: ", aColl.get_array('reversed_range'), "\n"
print "Sorted: ", aCollSorted.get_array('reversed_range'), "\n\n"

# And to make sure
print "---- First vs. last elements ---- \n"
print aColl.structures[0].get_positions(), "\n"
print aCollSorted.structures[-1].get_positions()
Exemple #3
0
# Then create an AtomsCollection
if not args.pkl:
    aC = AtomsCollection(args.input_files,
                         info={'name': args.seedname},
                         cell_reduce=True,
                         progress=True)
else:
    aC = AtomsCollection([])
    for f in args.input_files:
        aC += AtomsCollection.load(f)

# Take only n lowest energies if required
if (args.n is not None):
    # Sort by energy
    CalcEnergy.get(aC, store_array=True)
    aC = aC.sorted_byarray('calc_energy')[:args.n]

#discard_Z = args.Z is not None

# Calculate genomes
nrange = (0, 1) if args.gene_clamp else (None, None)
ndist = 1.0 if args.gene_clamp else None
pC = PhylogenCluster(aC, genes=[], norm_range=nrange, norm_dist=ndist)
pC.set_genes(gene_list, load_arrays=True)

if args.savepkl is not None:
    # Save the structure
    pC.save_collection(os.path.join(args.savepkl, args.seedname + '.pkl'))

# And now for the output
if args.genomes:
Exemple #4
0
def __main__():
    # So, first parse the command line arguments
    parser = ap.ArgumentParser()
    # Main argument
    parser.add_argument('seedname',
                        type=str,
                        default=None,
                        help="Seedname of the job")
    parser.add_argument('input_files',
                        type=str,
                        nargs='+',
                        default=None,
                        help="Files to load and analyse")
    # Optional arguments
    # Boolean args
    parser.add_argument('-pkl',
                        action='store_true',
                        default=False,
                        help="Input is one or more AtomsCollections"
                        " in PICKLE format")
    parser.add_argument('-tree',
                        action='store_true',
                        default=False,
                        help="Create JSON tree file")
    parser.add_argument('-distmat',
                        action='store_true',
                        default=False,
                        help="Save a distance matrix file")
    parser.add_argument('-forcemat',
                        action='store_true',
                        default=False,
                        help="Create JSON distance matrix file for force "
                        "layout")
    parser.add_argument('-genomes',
                        action='store_true',
                        default=False,
                        help="Save genomes text file")
    parser.add_argument('-gene_png',
                        action='store_true',
                        default=False,
                        help="Save genomes png file")
    parser.add_argument('-gene_clamp',
                        action='store_true',
                        default=True,
                        help="Clamp genes between 0 and 1")
    # Value args
    parser.add_argument('-n',
                        type=int,
                        default=None,
                        help="Number of lowest energy structures to keep")
    # parser.add_argument('-Z', type=int, default=None,
    # help="Expected numbers of molecules in cell (discard structures "
    # "that don't agree)")
    parser.add_argument('-corrmat_minforce',
                        type=float,
                        default=0.01,
                        help="Correlation matrix minimum force")
    parser.add_argument('-corrmat_maxforce',
                        type=float,
                        default=1.0,
                        help="Correlation matrix maximum force")
    parser.add_argument('-savepkl',
                        type=str,
                        default=None,
                        help="Directory to save collection as pickle file")

    args = parser.parse_args()

    # First, parse the gene list
    gene_list = load_genefile(args.seedname + '.gene')

    # Then create an AtomsCollection
    if not args.pkl:
        aC = AtomsCollection(args.input_files,
                             info={'name': args.seedname},
                             cell_reduce=True,
                             progress=True)
    else:
        aC = AtomsCollection([])
        for f in args.input_files:
            aC += AtomsCollection.load(f)

    # Take only n lowest energies if required
    if (args.n is not None):
        # Sort by energy
        CalcEnergy.get(aC, store_array=True)
        aC = aC.sorted_byarray('calc_energy')[:args.n]

    #discard_Z = args.Z is not None

    # Calculate genomes
    nrange = (0, 1) if args.gene_clamp else (None, None)
    ndist = 1.0 if args.gene_clamp else None
    pC = PhylogenCluster(aC, genes=[], norm_range=nrange, norm_dist=ndist)
    pC.set_genes(gene_list, load_arrays=True)

    if args.savepkl is not None:
        # Save the structure
        pC.save_collection(os.path.join(args.savepkl, args.seedname + '.pkl'))

    # And now for the output
    if args.genomes:
        with open(args.seedname + "_genomes.dat", 'w') as ofile:
            genvec, legend = pC.get_genome_vectors()
            # Create a header
            header = ('Structure\t'
                      '{0}\n').format('\t'.join([
                          '\t'.join([
                              '{0}_{1}'.format(l[0], i + 1)
                              for i in range(l[1])
                          ]) for l in legend
                      ]))
            ofile.write(header)
            try:
                for i, gen in enumerate(genvec):
                    ofile.write(aC.structures[i].info['name'] + '\t' +
                                ' '.join([str(x) for x in gen]) + '\n')
            except KeyError:
                # Someone has no name!
                raise RuntimeError('Not all structures have a name, '
                                   'can\'t print out genomes as table')

    if args.gene_png and _has_matplotlib:
        with open(args.seedname + "_genomes.png", 'w') as ofile:
            genomes2png(aC, pC, ofile)

    if args.tree:
        tree = pC.get_hier_tree()
        with open(args.seedname + "_tree.json", 'w') as ofile:
            json.dump(tree2json(aC, pC), ofile)