Beispiel #1
0
    def save(self):
        # Randomly generate a password.
        username, email, password = (self.cleaned_data['username'],
                                     self.cleaned_data['email'],
                                     rand_string(10))

        new_user = UserenaSignup.objects.create_inactive_user(username, email, password, send_email=False)

        # Send the activation email. Include the generated password.
        userena_signup_obj = UserenaSignup.objects.get(user__username=username)
        send_activation_email_with_password(userena_signup_obj, password)

        return new_user
Beispiel #2
0
def annotations_file_to_python(annoFile, source, expecting_labels):
    """
    Takes: an annotations file

    Returns: the Pythonized annotations:
    A dictionary like this:
    {'Shore1;Reef3;...;2008': [{'row':'695', 'col':'802', 'label':'POR'},
                               {'row':'284', 'col':'1002', 'label':'ALG'},
                               ...],
     'Shore2;Reef5;...;2009': [...]
     ... }

    Checks for: correctness of file formatting, i.e. all words/tokens are there on each line
    (will throw an error otherwise)
    """

    # We'll assume annoFile is an InMemoryUploadedFile, as opposed to a filename of a temp-disk-storage file.
    # If we encounter a case where we have a filename, use the below:
    #annoFile = open(annoFile, 'r')

    # Format args: line number, line contents, error message
    file_error_format_str = str_consts.ANNOTATION_FILE_FULL_ERROR_MESSAGE_FMTSTR

    numOfKeys = source.num_of_keys()
    uniqueLabelCodes = []

    # The order of the words/tokens is encoded here.  If the order ever
    # changes, we should only have to change this part.
    words_format_without_label = ['value'+str(i) for i in range(1, numOfKeys+1)]
    words_format_without_label += ['date', 'row', 'col']
    words_format_with_label = words_format_without_label + ['label']

    num_words_with_label = len(words_format_with_label)
    num_words_without_label = len(words_format_without_label)

    # The annotation dict needs to be kept on disk temporarily until all the
    # Ajax upload requests are done. Thus, we'll use Python's shelve module
    # to make a persistent dict.
    if not os.access(settings.SHELVED_ANNOTATIONS_DIR, os.R_OK | os.W_OK):
        # Don't catch this error and display it to the user.
        # Just let it become a server error to be e-mailed to the admins.
        raise DirectoryAccessError(
            "The SHELVED_ANNOTATIONS_DIR either does not exist, is not readable, or is not writable. Please rectify this."
        )
    annotation_dict_id = rand_string(10)
    annotation_dict = shelve.open(os.path.join(
        settings.SHELVED_ANNOTATIONS_DIR,
        'source{source_id}_{dict_id}'.format(
            source_id=source.id,
            dict_id=annotation_dict_id,
        ),
    ))

    for line_num, line in enumerate(annoFile, 1):

        # Strip any leading UTF-8 BOM, then strip any
        # leading/trailing whitespace.
        stripped_line = line.lstrip(codecs.BOM_UTF8).strip()

        # Ignore empty lines.
        if stripped_line == '':
            continue

        # Split the line into words/tokens.
        unstripped_words = stripped_line.split(';')
        # Strip leading and trailing whitespace from each token.
        words = [w.strip() for w in unstripped_words]

        # Check that all expected words/tokens are there.
        is_valid_format_with_label = (len(words) == num_words_with_label)
        is_valid_format_without_label = (len(words) == num_words_without_label)
        words_format_is_valid = (
            (expecting_labels and is_valid_format_with_label)
            or (not expecting_labels and (is_valid_format_with_label or is_valid_format_without_label))
        )
        if expecting_labels:
            num_words_expected = num_words_with_label
        else:
            num_words_expected = num_words_without_label

        if not words_format_is_valid:
            annotation_dict.close()
            annoFile.close()
            raise FileContentError(file_error_format_str.format(
                line_num=line_num,
                line=stripped_line,
                error=str_consts.ANNOTATION_FILE_TOKEN_COUNT_ERROR_FMTSTR.format(
                    num_words_expected=num_words_expected,
                    num_words_found=len(words),
                )
            ))

        # Encode the line data into a dictionary: {'value1':'Shore2', 'row':'575', ...}
        if is_valid_format_with_label:
            lineData = dict(zip(words_format_with_label, words))
        else:  # valid format without label
            lineData = dict(zip(words_format_without_label, words))

        try:
            row = int(lineData['row'])
            if row <= 0:
                raise ValueError
        except ValueError:
            annotation_dict.close()
            annoFile.close()
            raise FileContentError(file_error_format_str.format(
                line_num=line_num,
                line=stripped_line,
                error=str_consts.ANNOTATION_FILE_ROW_NOT_POSITIVE_INT_ERROR_FMTSTR.format(row=lineData['row']),
            ))

        try:
            col = int(lineData['col'])
            if col <= 0:
                raise ValueError
        except ValueError:
            annotation_dict.close()
            annoFile.close()
            raise FileContentError(file_error_format_str.format(
                line_num=line_num,
                line=stripped_line,
                error=str_consts.ANNOTATION_FILE_COL_NOT_POSITIVE_INT_ERROR_FMTSTR.format(column=lineData['col']),
            ))

        if expecting_labels:
            # Check that the label code corresponds to a label in the database
            # and in the source's labelset.
            # Only check this if the label code hasn't been seen before
            # in the annotations file.

            label_code = lineData['label']
            if label_code not in uniqueLabelCodes:

                labelObjs = Label.objects.filter(code=label_code)
                if len(labelObjs) == 0:
                    annotation_dict.close()
                    annoFile.close()
                    raise FileContentError(file_error_format_str.format(
                        line_num=line_num,
                        line=stripped_line,
                        error=str_consts.ANNOTATION_FILE_LABEL_NOT_IN_DATABASE_ERROR_FMTSTR.format(label_code=label_code),
                    ))

                labelObj = labelObjs[0]
                if labelObj not in source.labelset.labels.all():
                    annotation_dict.close()
                    annoFile.close()
                    raise FileContentError(file_error_format_str.format(
                        line_num=line_num,
                        line=stripped_line,
                        error=str_consts.ANNOTATION_FILE_LABEL_NOT_IN_LABELSET_ERROR_FMTSTR.format(label_code=label_code),
                    ))

                uniqueLabelCodes.append(label_code)

        # Get and check the photo year to make sure it's valid.
        # We'll assume the year is the first 4 characters of the date.
        year = lineData['date'][:4]
        try:
            datetime.date(int(year),1,1)
        # Year is non-coercable to int, or year is out of range (e.g. 0 or negative)
        except ValueError:
            annotation_dict.close()
            annoFile.close()
            raise FileContentError(file_error_format_str.format(
                line_num=line_num,
                line=stripped_line,
                error=str_consts.ANNOTATION_FILE_YEAR_ERROR_FMTSTR.format(year=year),
            ))

        # TODO: Check if the row and col in this line are a valid row and col
        # for the image.  Need the image to do that, though...


        # Use the location values and the year to build a string identifier for the image, such as:
        # Shore1;Reef5;...;2008
        valueList = [lineData['value'+str(i)] for i in range(1,numOfKeys+1)]
        imageIdentifier = get_image_identifier(valueList, year)

        # Add/update a dictionary entry for the image with this identifier.
        # The dict entry's value is a list of labels.  Each label is a dict:
        # {'row':'484', 'col':'320', 'label':'POR'}
        if not annotation_dict.has_key(imageIdentifier):
            annotation_dict[imageIdentifier] = []

        # Append the annotation as a dict containing row, col, and label
        # (or just row and col, if no labels).
        #
        # Can't append directly to annotation_dict[imageIdentifier], due to
        # how shelved dicts work. So we use this pattern with a temporary
        # variable.
        # See http://docs.python.org/library/shelve.html?highlight=shelve#example
        tmp_data = annotation_dict[imageIdentifier]
        if expecting_labels:
            tmp_data.append(
                dict(row=row, col=col, label=lineData['label'])
            )
        else:
            tmp_data.append(
                dict(row=row, col=col)
            )
        annotation_dict[imageIdentifier] = tmp_data

    annoFile.close()

    return (annotation_dict, annotation_dict_id)
Beispiel #3
0
def store_csv_file(csv_file, source):
    """
    This will store the csv_file uploaded using python's shelve module temporarily.
    Also does a few error checks, such as if the length of the rows are too long,
    if there are duplicate filenames present in the file, etc.
    """

    # TODO: If we return the whole CSV dict to the Javascript side anyway,
    # then we don't really need to keep a shelved version of the dict
    # on the server side. That's redundant.

    if not os.access(settings.SHELVED_ANNOTATIONS_DIR, os.R_OK | os.W_OK):
        # Don't catch this error and display it to the user.
        # Just let it become a server error to be e-mailed to the admins.
        raise DirectoryAccessError(
            "The SHELVED_ANNOTATIONS_DIR either does not exist, is not readable, or is not writable. Please rectify this."
        )
    csv_dict_id = rand_string(10)
    csv_dict = dict()

    # splitlines() is to do system-agnostic handling of newline characters.
    # The csv module can't do that by default (fails on CR only).
    reader = csv.reader(csv_file.read().splitlines(), dialect='excel')
    num_keys = source.num_of_keys()
    filenames_processed = []

    fields = (['photo_date'] +
              ['value1', 'value2', 'value3', 'value4', 'value5'][:num_keys] +
              ['height_in_cm', 'latitude', 'longitude',
               'depth', 'camera', 'photographer', 'water_quality',
               'strobes', 'framing', 'balance'])

    for row in reader:
        metadata_for_file = {}

        # Gets filename, strips any UTF-8 BOM from the start of the CSV line.
        filename = row.pop(0).lstrip(codecs.BOM_UTF8)
        # Checks if we already found data for this filename.
        if filename in filenames_processed:
            raise FileContentError('metadata for file "{file}" found twice in CSV file.'.format(
                file=filename,
            ))

        filenames_processed.append(filename)

        if len(row) > len(fields):
            raise FileContentError("{file}: Too many metadata values.".format(file=filename))
        if len(row) < len(fields):
            raise FileContentError("{file}: Too few metadata values.".format(file=filename))

        # Num of comma-separated values equals num of expected fields.
        # Get the metadata from the CSV row.
        for field_name, value in zip(fields, row):
            metadata_for_file[field_name] = value

        csv_dict[filename] = metadata_for_file

    csv_shelf_dict = shelve.open(os.path.join(
        settings.SHELVED_ANNOTATIONS_DIR,
        'csv_source{source_id}_{dict_id}.db'.format(
            source_id=source.id,
            dict_id=csv_dict_id,
        ),
    ))
    for k,v in csv_dict.iteritems():
        csv_shelf_dict[k] = v
    csv_shelf_dict.close()

    return (csv_dict, csv_dict_id)