def test_sorting(self): # Generate a few structures struct_n = 5 aselist = [] for n in range(struct_n): aselist.append(Atoms()) testcoll = AtomsCollection(aselist) testcoll.set_array('sorter', np.array(range(struct_n, 0, -1))) testcoll.set_array('sorted', np.array(range(1, struct_n + 1))) testcoll = testcoll.sorted_byarray('sorter') self.assertTrue( np.all(testcoll.get_array('sorted') == range(struct_n, 0, -1)))
print aColl.all.info, '\n\n' # Collections can also be sliced like Numpy arrays for convenience aColl02 = aColl[0:2] aColl25 = aColl[2:5] # Then join them together aColl05 = aColl02 + aColl25 print "---- Collection slice lengths ---- \n" print "aColl02 = {0}\taColl25 = {1}\taColl05 = {2}\n\n".format( aColl02.length, aColl25.length, aColl05.length) # Collections can also store "arrays" of data, similarly to Atoms objects in ase # These arrays' elements are tied each to one structure, and can be used to sort them arr = range(10, 0, -1) # Let's use this array to reverse the order of a collection aColl.set_array('reversed_range', arr) aCollSorted = aColl.sorted_byarray('reversed_range') print "---- Getting an array from a collection ---- \n" print "Unsorted: ", aColl.get_array('reversed_range'), "\n" print "Sorted: ", aCollSorted.get_array('reversed_range'), "\n\n" # And to make sure print "---- First vs. last elements ---- \n" print aColl.structures[0].get_positions(), "\n" print aCollSorted.structures[-1].get_positions()
# Then create an AtomsCollection if not args.pkl: aC = AtomsCollection(args.input_files, info={'name': args.seedname}, cell_reduce=True, progress=True) else: aC = AtomsCollection([]) for f in args.input_files: aC += AtomsCollection.load(f) # Take only n lowest energies if required if (args.n is not None): # Sort by energy CalcEnergy.get(aC, store_array=True) aC = aC.sorted_byarray('calc_energy')[:args.n] #discard_Z = args.Z is not None # Calculate genomes nrange = (0, 1) if args.gene_clamp else (None, None) ndist = 1.0 if args.gene_clamp else None pC = PhylogenCluster(aC, genes=[], norm_range=nrange, norm_dist=ndist) pC.set_genes(gene_list, load_arrays=True) if args.savepkl is not None: # Save the structure pC.save_collection(os.path.join(args.savepkl, args.seedname + '.pkl')) # And now for the output if args.genomes:
def __main__(): # So, first parse the command line arguments parser = ap.ArgumentParser() # Main argument parser.add_argument('seedname', type=str, default=None, help="Seedname of the job") parser.add_argument('input_files', type=str, nargs='+', default=None, help="Files to load and analyse") # Optional arguments # Boolean args parser.add_argument('-pkl', action='store_true', default=False, help="Input is one or more AtomsCollections" " in PICKLE format") parser.add_argument('-tree', action='store_true', default=False, help="Create JSON tree file") parser.add_argument('-distmat', action='store_true', default=False, help="Save a distance matrix file") parser.add_argument('-forcemat', action='store_true', default=False, help="Create JSON distance matrix file for force " "layout") parser.add_argument('-genomes', action='store_true', default=False, help="Save genomes text file") parser.add_argument('-gene_png', action='store_true', default=False, help="Save genomes png file") parser.add_argument('-gene_clamp', action='store_true', default=True, help="Clamp genes between 0 and 1") # Value args parser.add_argument('-n', type=int, default=None, help="Number of lowest energy structures to keep") # parser.add_argument('-Z', type=int, default=None, # help="Expected numbers of molecules in cell (discard structures " # "that don't agree)") parser.add_argument('-corrmat_minforce', type=float, default=0.01, help="Correlation matrix minimum force") parser.add_argument('-corrmat_maxforce', type=float, default=1.0, help="Correlation matrix maximum force") parser.add_argument('-savepkl', type=str, default=None, help="Directory to save collection as pickle file") args = parser.parse_args() # First, parse the gene list gene_list = load_genefile(args.seedname + '.gene') # Then create an AtomsCollection if not args.pkl: aC = AtomsCollection(args.input_files, info={'name': args.seedname}, cell_reduce=True, progress=True) else: aC = AtomsCollection([]) for f in args.input_files: aC += AtomsCollection.load(f) # Take only n lowest energies if required if (args.n is not None): # Sort by energy CalcEnergy.get(aC, store_array=True) aC = aC.sorted_byarray('calc_energy')[:args.n] #discard_Z = args.Z is not None # Calculate genomes nrange = (0, 1) if args.gene_clamp else (None, None) ndist = 1.0 if args.gene_clamp else None pC = PhylogenCluster(aC, genes=[], norm_range=nrange, norm_dist=ndist) pC.set_genes(gene_list, load_arrays=True) if args.savepkl is not None: # Save the structure pC.save_collection(os.path.join(args.savepkl, args.seedname + '.pkl')) # And now for the output if args.genomes: with open(args.seedname + "_genomes.dat", 'w') as ofile: genvec, legend = pC.get_genome_vectors() # Create a header header = ('Structure\t' '{0}\n').format('\t'.join([ '\t'.join([ '{0}_{1}'.format(l[0], i + 1) for i in range(l[1]) ]) for l in legend ])) ofile.write(header) try: for i, gen in enumerate(genvec): ofile.write(aC.structures[i].info['name'] + '\t' + ' '.join([str(x) for x in gen]) + '\n') except KeyError: # Someone has no name! raise RuntimeError('Not all structures have a name, ' 'can\'t print out genomes as table') if args.gene_png and _has_matplotlib: with open(args.seedname + "_genomes.png", 'w') as ofile: genomes2png(aC, pC, ofile) if args.tree: tree = pC.get_hier_tree() with open(args.seedname + "_tree.json", 'w') as ofile: json.dump(tree2json(aC, pC), ofile)