def upload_potcar_family(cls, source, group_name, group_description=None, stop_if_existing=True, dry_run=False): """ Upload a set of POTCAR potentials as a family. :param folder: a path containing all POTCAR files to be added. :param group_name: the name of the group to create. If it exists and is non-empty, a UniquenessError is raised. :param group_description: a string to be set as the group description. Overwrites previous descriptions, if the group was existing. :param stop_if_existing: if True, check for the sha512 of the files and, if the file already exists in the DB, raises a MultipleObjectsError. If False, simply adds the existing UPFData node to the group. :param dry_run: If True, do not change the database. """ group = cls._prepare_group_for_upload(group_name, group_description, dry_run=dry_run) potcar_finder = PotcarWalker(source) potcar_finder.walk() num_files = len(potcar_finder.potcars) family_nodes_uuid = [node.uuid for node in group.nodes] if not dry_run else [] potcars_tried_upload = cls._try_upload_potcars( potcar_finder.potcars, stop_if_existing=stop_if_existing, dry_run=dry_run) new_potcars_added = [ (potcar, created, file_path) for potcar, created, file_path in potcars_tried_upload if potcar.uuid not in family_nodes_uuid ] for potcar, created, file_path in new_potcars_added: if created: aiidalogger.debug( 'New PotcarData node %s created while uploading file %s for family %s', potcar.uuid, file_path, group_name) else: aiidalogger.debug( 'PotcarData node %s used instead of uploading file %s to family %s', potcar.uuid, file_path, group_name) if not dry_run: group.add_nodes( [potcar for potcar, created, file_path in new_potcars_added]) num_added = len(new_potcars_added) num_uploaded = len([item for item in new_potcars_added if item[1]]) # item[1] refers to 'created' return num_files, num_added, num_uploaded
def upload_upf_family(folder, group_label, group_description, stop_if_existing=True): """Upload a set of UPF files in a given group. :param folder: a path containing all UPF files to be added. Only files ending in .UPF (case-insensitive) are considered. :param group_label: the name of the group to create. If it exists and is non-empty, a UniquenessError is raised. :param group_description: string to be set as the group description. Overwrites previous descriptions. :param stop_if_existing: if True, check for the md5 of the files and, if the file already exists in the DB, raises a MultipleObjectsError. If False, simply adds the existing UPFData node to the group. """ # pylint: disable=too-many-locals,too-many-branches import os from aiida import orm from aiida.common import AIIDA_LOGGER from aiida.common.exceptions import UniquenessError from aiida.common.files import md5_file if not os.path.isdir(folder): raise ValueError('folder must be a directory') # only files, and only those ending with .upf or .UPF; # go to the real file if it is a symlink filenames = [ os.path.realpath(os.path.join(folder, i)) for i in os.listdir(folder) if os.path.isfile(os.path.join(folder, i)) and i.lower().endswith('.upf') ] nfiles = len(filenames) automatic_user = orm.User.objects.get_default() group, group_created = orm.Group.objects.get_or_create( label=group_label, type_string=UPFGROUP_TYPE, user=automatic_user) if group.user.email != automatic_user.email: raise UniquenessError( 'There is already a UpfFamily group with label {}' ', but it belongs to user {}, therefore you ' 'cannot modify it'.format(group_label, group.user.email)) # Always update description, even if the group already existed group.description = group_description # NOTE: GROUP SAVED ONLY AFTER CHECKS OF UNICITY pseudo_and_created = [] for filename in filenames: md5sum = md5_file(filename) builder = orm.QueryBuilder() builder.append(UpfData, filters={'attributes.md5': {'==': md5sum}}) existing_upf = builder.first() if existing_upf is None: # return the upfdata instances, not stored pseudo, created = UpfData.get_or_create(filename, use_first=True, store_upf=False) # to check whether only one upf per element exists # NOTE: actually, created has the meaning of "to_be_created" pseudo_and_created.append((pseudo, created)) else: if stop_if_existing: raise ValueError('A UPF with identical MD5 to ' ' {} cannot be added with stop_if_existing' ''.format(filename)) existing_upf = existing_upf[0] pseudo_and_created.append((existing_upf, False)) # check whether pseudo are unique per element elements = [(i[0].element, i[0].md5sum) for i in pseudo_and_created] # If group already exists, check also that I am not inserting more than # once the same element if not group_created: for aiida_n in group.nodes: # Skip non-pseudos if not isinstance(aiida_n, UpfData): continue elements.append((aiida_n.element, aiida_n.md5sum)) elements = set(elements) # Discard elements with the same MD5, that would # not be stored twice elements_names = [e[0] for e in elements] if not len(elements_names) == len(set(elements_names)): duplicates = {x for x in elements_names if elements_names.count(x) > 1} duplicates_string = ', '.join(i for i in duplicates) raise UniquenessError('More than one UPF found for the elements: ' + duplicates_string + '.') # At this point, save the group, if still unstored if group_created: group.store() # save the upf in the database, and add them to group for pseudo, created in pseudo_and_created: if created: pseudo.store() AIIDA_LOGGER.debug('New node {} created for file {}'.format( pseudo.uuid, pseudo.filename)) else: AIIDA_LOGGER.debug('Reusing node {} for file {}'.format( pseudo.uuid, pseudo.filename)) # Add elements to the group all togetehr group.add_nodes([pseudo for pseudo, created in pseudo_and_created]) nuploaded = len([_ for _, created in pseudo_and_created if created]) return nfiles, nuploaded
def parse_upf(fname, check_filename=True): """ Try to get relevant information from the UPF. For the moment, only the element name. Note that even UPF v.2 cannot be parsed with the XML minidom! (e.g. due to the & characters in the human-readable section). If check_filename is True, raise a ParsingError exception if the filename does not start with the element name. """ import os from aiida.common.exceptions import ParsingError from aiida.common import AIIDA_LOGGER from aiida.orm.nodes.data.structure import _valid_symbols parsed_data = {} try: upf_contents = fname.read() fname = fname.name except AttributeError: with io.open(fname, encoding='utf8') as handle: upf_contents = handle.read() match = REGEX_UPF_VERSION.search(upf_contents) if match: version = match.group('version') AIIDA_LOGGER.debug('Version found: {} for file {}'.format( version, fname)) else: AIIDA_LOGGER.debug('Assuming version 1 for file {}'.format(fname)) version = '1' parsed_data['version'] = version try: version_major = int(version.partition('.')[0]) except ValueError: # If the version string does not contain a dot, fallback # to version 1 AIIDA_LOGGER.debug('Falling back to version 1 for file {}, ' "version string '{}' unrecognized".format( fname, version)) version_major = 1 element = None if version_major == 1: match = REGEX_ELEMENT_V1.search(upf_contents) if match: element = match.group('element_name') else: # all versions > 1 match = REGEX_ELEMENT_V2.search(upf_contents) if match: element = match.group('element_name') if element is None: raise ParsingError( 'Unable to find the element of UPF {}'.format(fname)) element = element.capitalize() if element not in _valid_symbols: raise ParsingError('Unknown element symbol {} for file {}'.format( element, fname)) if check_filename: if not os.path.basename(fname).lower().startswith(element.lower()): raise ParsingError('Filename {0} was recognized for element ' '{1}, but the filename does not start ' 'with {1}'.format(fname, element)) parsed_data['element'] = element return parsed_data
def upload_psf_family(folder, group_label, group_description, stop_if_existing=True): """ Upload a set of PSF files in a given group. :param folder: a path containing all PSF files to be added. Only files ending in .PSF (case-insensitive) are considered. :param group_label: the name of the group to create. If it exists and is non-empty, a UniquenessError is raised. :param group_description: a string to be set as the group description. Overwrites previous descriptions, if the group was existing. :param stop_if_existing: if True, check for the md5 of the files and, if the file already exists in the DB, raises a MultipleObjectsError. If False, simply adds the existing PsfData node to the group. """ import os from aiida import orm from aiida.common import AIIDA_LOGGER as aiidalogger from aiida.common.exceptions import UniquenessError from aiida.orm.querybuilder import QueryBuilder from aiida_siesta.groups.pseudos import PsfFamily message = ( #pylint: disable=invalid-name 'This function has been deprecated and will be removed in `v2.0.0`. ' + '`upload_psf_family` is substitued by `fam.create_from_folder` ' + 'where `fam` is an instance of the families classes in `aiida_pseudo.groups.family`.' ) warnings.warn(message, AiidaSiestaDeprecationWarning) if not os.path.isdir(folder): raise ValueError("folder must be a directory") # only files, and only those ending with .psf or .PSF; # go to the real file if it is a symlink files = [ os.path.realpath(os.path.join(folder, i)) for i in os.listdir(folder) if os.path.isfile(os.path.join(folder, i)) and i.lower().endswith('.psf') ] nfiles = len(files) automatic_user = orm.User.objects.get_default() group, group_created = PsfFamily.objects.get_or_create(label=group_label, user=automatic_user) if group.user.email != automatic_user.email: raise UniquenessError( "There is already a PsfFamily group with name {}" ", but it belongs to user {}, therefore you " "cannot modify it".format(group_label, group.user.email) ) # Always update description, even if the group already existed group.description = group_description # NOTE: GROUP SAVED ONLY AFTER CHECKS OF UNICITY pseudo_and_created = [] for afile in files: md5sum = md5_file(afile) qb = QueryBuilder() qb.append(PsfData, filters={'attributes.md5': {'==': md5sum}}) existing_psf = qb.first() #existing_psf = PsfData.query(dbattributes__key="md5", # dbattributes__tval = md5sum) if existing_psf is None: # return the psfdata instances, not stored pseudo, created = PsfData.get_or_create(afile, use_first=True, store_psf=False) # to check whether only one psf per element exists # NOTE: actually, created has the meaning of "to_be_created" pseudo_and_created.append((pseudo, created)) else: if stop_if_existing: raise ValueError( "A PSF with identical MD5 to " " {} cannot be added with stop_if_existing" "".format(afile) ) existing_psf = existing_psf[0] pseudo_and_created.append((existing_psf, False)) # check whether pseudo are unique per element elements = [(i[0].element, i[0].md5sum) for i in pseudo_and_created] # If group already exists, check also that I am not inserting more than # once the same element if not group_created: for aiida_n in group.nodes: # Skip non-pseudos if not isinstance(aiida_n, PsfData): continue elements.append((aiida_n.element, aiida_n.md5sum)) elements = set(elements) # Discard elements with the same MD5, that would # not be stored twice elements_names = [e[0] for e in elements] if not len(elements_names) == len(set(elements_names)): duplicates = {x for x in elements_names if elements_names.count(x) > 1} duplicates_string = ", ".join(i for i in duplicates) raise UniquenessError("More than one PSF found for the elements: " + duplicates_string + ".") # At this point, save the group, if still unstored if group_created: group.store() # save the psf in the database, and add them to group for pseudo, created in pseudo_and_created: if created: pseudo.store() aiidalogger.debug("New node {} created for file {}".format(pseudo.uuid, pseudo.filename)) else: aiidalogger.debug("Reusing node {} for file {}".format(pseudo.uuid, pseudo.filename)) # Add elements to the group all togetehr group.add_nodes([pseudo for pseudo, created in pseudo_and_created]) nuploaded = len([_ for _, created in pseudo_and_created if created]) return nfiles, nuploaded