Beispiel #1
0
def upload_metadata_csv(request):

    if request.method != 'POST':
        form = CSVUploadForm()
        return render(request, 'documents/manage_upload_metadata_csv.html', locals())

    form = CSVUploadForm(request.POST, request.FILES)
    if not form.is_valid():
        return render(request, 'documents/manage_upload_metadata_csv.html', locals())
            
    csv_file = request.FILES['csv']
    # FIXME: It's pretty annoying to hard code the expected encoding of the csv
    reader = UnicodeReader(open(csv_file.temporary_file_path()), encoding="iso-8859-1", delimiter='\t')
    initial = []
    products = {}
    seen_sources = set()
    seen_title_author = set()
    for row in reader:
        fields = {'title': row[0], 'author': row[1], 
                  'identifier': row[2], 'source': row[3], 
                  'source_edition': row[4], 'source_publisher': row[5],
                  'language': Document.language_choices[0][0]}
        # collect all products for a given isbn
        if fields['source'] != '' and VALID_ISBN_RE.match(fields['source']):
            products.setdefault(fields['source'], set()).add(fields['identifier'])
        # filter entries in the csv that deal with the same isbn or with the same title/author combo
        if fields['source'] in seen_sources or fields['title'] + fields['author'] in seen_title_author:
            continue
        if fields['source']:
            seen_sources.add(fields['source'])
        if fields['title'] + fields['author']:
            seen_title_author.add(fields['title'] + fields['author'])
        # FIXME: Obviously the following is highly SBS specific
        if row[7] != '0':
            fields['production_series'] = Document.PRODUCTION_SERIES_CHOICES[1][0]
            fields['production_series_number'] = row[7]
        elif row[6].upper().find('SJW') != -1:
            fields['production_series'] = Document.PRODUCTION_SERIES_CHOICES[0][0]
            m = re.search('\d+', row[6]) # extract the series number
            if m != None:
                fields['production_series_number'] = m.group(0)
        if row[8] == 'D':
            fields['production_source'] = Document.PRODUCTION_SOURCE_CHOICES[0][0]
        initial.append(fields)
    # filter out existing document entries
    new_identifiers = [row['identifier'] for row in initial]
    new_sources = [row['source'] for row in initial if row['source']]
    duplicate_identifiers = [document.identifier for 
                             document in Document.objects.filter(identifier__in=new_identifiers)]
    duplicate_sources = [document.source for 
                         document in Document.objects.filter(source__in=new_sources)]
    unique_initial = [row for row in initial if row['identifier'] not in duplicate_identifiers and row['source'] not in duplicate_sources]
    ProductFormset = formset_factory(PartialProductForm, extra=0)
    product_formset = ProductFormset(initial=[{'isbn': isbn, 'productNumber': number} for (isbn, v) in products.items() for number in v],
                                     prefix='products')
    DocumentFormSet = modelformset_factory(Document, 
                                           fields=('author', 'title', 'identifier', 'source', 'source_edition', 'source_publisher', 'language', 'production_series', 'production_series_number', 'production_source'), 
                                           extra=len(unique_initial), can_delete=True)
    document_formset = DocumentFormSet(queryset=Document.objects.none(), initial=unique_initial, prefix='documents')
    return render(request, 'documents/manage_import_metadata_csv.html', locals())
Beispiel #2
0
def upload_metadata_csv(request):

    if request.method != 'POST':
        form = CSVUploadForm()
        return render_to_response('documents/manage_upload_metadata_csv.html', locals(), 
                                  context_instance=RequestContext(request))

    form = CSVUploadForm(request.POST, request.FILES)
    if not form.is_valid():
        return render_to_response('documents/manage_upload_metadata_csv.html', locals(), 
                                  context_instance=RequestContext(request))
            
    csv_file = request.FILES['csv']
    # FIXME: It's pretty annoying to hard code the expected encoding of the csv
    reader = UnicodeReader(open(csv_file.temporary_file_path()), encoding="iso-8859-1", delimiter='\t')
    initial = []
    for row in reader:
        fields = {'title': row[0], 'author': row[1], 
                  'identifier': row[2], 'source': row[3], 
                  'source_edition': row[4], 'source_publisher': row[5],
                  'language': Document.language_choices[0][0]}
        # FIXME: Obviously the following is highly SBS specific
        if row[7] != '0':
            fields['production_series'] = Document.PRODUCTION_SERIES_CHOICES[1][0]
            fields['production_series_number'] = row[7]
        elif row[6].upper().find('SJW') != -1:
            fields['production_series'] = Document.PRODUCTION_SERIES_CHOICES[0][0]
            m = re.search('\d+', row[6]) # extract the series number
            if m != None:
                fields['production_series_number'] = m.group(0)
        initial.append(fields)
    # filter out existing document entries
    new_identifiers = [row['identifier'] for row in initial]
    duplicate_identifiers = [document.identifier for 
                             document in Document.objects.filter(identifier__in=new_identifiers)]
    unique_initial = [row for row in initial if row['identifier'] not in duplicate_identifiers]

    DocumentFormSet = modelformset_factory(Document, 
                                           fields=('author', 'title', 'identifier', 'source', 'source_edition', 'source_publisher', 'language', 'production_series', 'production_series_number'), 
                                           extra=len(unique_initial), can_delete=True)
    formset = DocumentFormSet(queryset=Document.objects.none(), initial=unique_initial)
    return render_to_response('documents/manage_import_metadata_csv.html', locals(),
                              context_instance=RequestContext(request))