def test_xic_mass_filter(self): d1 = Dataset(name='dataset') d1.save() mz = 60.993 # three larger Xic(mz=mz + 5., dataset=d1).save() Xic(mz=mz + 10., dataset=d1).save() Xic(mz=mz + 15., dataset=d1).save() # three approx equal Xic(mz=mz + 0.005, dataset=d1).save() Xic(mz=mz + 0.0, dataset=d1).save() Xic(mz=mz - 0.0015, dataset=d1).save() # three smaller Xic(mz=mz - 5., dataset=d1).save() Xic(mz=mz - 10., dataset=d1).save() Xic(mz=mz - 15., dataset=d1).save() # three approx equal from another dataset d2 = Dataset(name='dataset2') d2.save() Xic(mz=mz + 0.005, dataset=d2).save() Xic(mz=mz + 0.0, dataset=d2).save() Xic(mz=mz - 0.0015, dataset=d2).save() self.assertEqual(Xic.objects.all().count(), 12) xics = Xic.objects.all().filter(dataset=d1).filter(mz__gte=mz + 0.01).filter(mz__lte=mz - 0.01) self.assertEqual(xics.objects.all().count(), 3)
def run(verbose=True): ds = Dataset(name = '2010 Census Tracts', cached = datetime.utcnow().replace(tzinfo=utc), cache_max_age = 1000, remote_id_field = 'GEOID10', name_field = 'NAMELSAD10', lat_field = 'INTPTLAT10', lon_field = 'INTPTLON10', field1_en = 'Land Area', field1_name = 'ALAND10', field2_en = 'Water Area', field2_name = 'AWATER10') tract_mapping = { 'remote_id' : ds.remote_id_field, 'name' : ds.name_field, 'lat' : ds.lat_field, 'lon' : ds.lon_field, 'field1' : ds.field1_name, 'field2' : ds.field2_name, 'mpoly' : 'MULTIPOLYGON', } tract_shp = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data/tl_2010_36_tract10.shp')) lm = LayerMapping(MapPolygon, tract_shp, tract_mapping, transform=False, encoding='iso-8859-1') lm.save(strict=True, verbose=verbose) ds.save() MapPolygon.objects.filter(dataset = None).update(dataset = ds)
def dataset_upload(request): user = request.user if request.method == 'POST': if user.is_authenticated(): file = request.FILES.get('filename', '') file_name = file.name dest_dir = os.path.join(settings.USR_DATASET_ROOT, user.username) if not os.path.exists(dest_dir): os.makedirs(dest_dir) full_path = os.path.join(dest_dir, file_name) rel_path = os.path.join(user.username, file_name) destination = open(full_path, "wb+") for chunk in file.chunks(): destination.write(chunk) destination.close() description = request.POST['description'] access = request.POST['access'] tbl_separator = {"tab":'\t', "space":' ', "comma":',', "semicolon":';'} sep_str = request.POST['sep'] sep = tbl_separator[sep_str] header = request.POST['header'] if header == 'yes': header = True; elif header == 'no': header = False; ## a simple check size = 0 for line in open(full_path): size += 1 dim = len(line.split(sep)) if header == True: size -= 1 # exclude the header line new_dataset = Dataset(owner = user, path = rel_path, name = file_name, dim = dim, size = size, description = description, access = access, sep = sep_str, header = header) new_dataset.save() notice = "Congratulations! Your dataset has been successfully uploaded." # return render_to_response('dataset/success.html', RequestContext(request, {'dataset': new_dataset, 'notice': notice})) return HttpResponseRedirect('/datasets/%s/' % new_dataset.id) else: notice = "You must be logged in to upload datasets" form = UploadDatasetForm() return render_to_response('dataset/upload.html', RequestContext(request, {'form': form, 'notice': notice})) else: form = UploadDatasetForm() return render_to_response('dataset/upload.html', RequestContext(request, {'form': form}))
def test_make_FragmentationSpectrum_with_centroids(self): d1 = Dataset(name='Dataset1') d1.save() f1 = FragmentationSpectrum(precursor_mz='123.456', spec_num=0, dataset=d1) mzs = [10., 20, 50] ints = [1., 1., 1.] f1.set_centroid_mzs(mzs) f1.set_centroid_ints(ints) f1.save() np.testing.assert_array_almost_equal(mzs, f1.centroid_mzs) np.testing.assert_array_almost_equal(ints, f1.centroid_ints)
def test_is_not_empty_if_annotated(self): m1 = Molecule(name='test', sum_formula="C1H2O3") m1.save() s1 = Standard(molecule=m1) s1.save() d1 = Dataset() d1.save() fs1 = FragmentationSpectrum(ms1_intensity=42, dataset=d1, standard=s1) fs1.save() molecule_table, molecules_with_spectra = self.get_table_and_count() self.assertEqual(len(molecule_table.rows), 1) self.assertEqual(molecules_with_spectra, 1)
def test_xic_and_standard_and_adduct(self): # create some datasets d1 = Dataset(name='Dataset1') d1.save() a1 = Adduct(nM=1, delta_formula='-H', charge=-1) a1.save() m1 = Molecule(name='TestMolecule1', sum_formula="C1H2O3") m1.save() s1 = Standard(molecule=m1, inventory_id="0") s1.save() # create some xics x1 = Xic(mz=60.993, dataset=d1) xic = [1.0, 2.0, 3.0, 4.0, 5.0] x1.set_xic(xic) x1.standard = s1 x1.adduct = a1 x1.save() self.assertEqual(Xic.objects.all().count(), 1) self.assertEqual(Dataset.objects.all().count(), 1) self.assertEqual(Standard.objects.all().count(), 1) # mass check with self.assertRaises(ValueError): x1.mz = 123.993 x1.save() x1.check_mass() def test_xic_mass_filter(self): d1 = Dataset(name='dataset') d1.save() mz = 60.993 # three larger Xic(mz=mz + 5., dataset=d1).save() Xic(mz=mz + 10., dataset=d1).save() Xic(mz=mz + 15., dataset=d1).save() # three approx equal Xic(mz=mz + 0.005, dataset=d1).save() Xic(mz=mz + 0.0, dataset=d1).save() Xic(mz=mz - 0.0015, dataset=d1).save() # three smaller Xic(mz=mz - 5., dataset=d1).save() Xic(mz=mz - 10., dataset=d1).save() Xic(mz=mz - 15., dataset=d1).save() # three approx equal from another dataset d2 = Dataset(name='dataset2') d2.save() Xic(mz=mz + 0.005, dataset=d2).save() Xic(mz=mz + 0.0, dataset=d2).save() Xic(mz=mz - 0.0015, dataset=d2).save() self.assertEqual(Xic.objects.all().count(), 12) xics = Xic.objects.all().filter(dataset=d1).filter(mz__gte=mz + 0.01).filter(mz__lte=mz - 0.01) self.assertEqual(xics.objects.all().count(), 3)
def test_add_xic(self): m1 = Molecule(name='TestMolecule1', sum_formula="C1H2O3") m1.save() s1 = Standard(molecule=m1, inventory_id="0") s1.save() a1 = Adduct(nM=1, delta_formula='+H+K', charge=-2) a1.save() d1 = Dataset(name='Dataset1') d1.save() d1.standards_present.add(s1) d1.adducts_present.add(a1) x1 = Xic(mz='0.0', dataset=d1) xic = [1.0, 2.0, 3.0, 4.0, 5.0] x1.set_xic(xic) x1.save() self.assertEqual(Xic.objects.all().count(), 1) np.testing.assert_array_almost_equal(xic, x1.xic)
def setUpTestData(cls): d1 = Dataset(name='Dataset1') d1.save() m1 = Molecule(sum_formula='H2O') m1.save() m2 = Molecule(sum_formula='O2') m2.save() s1 = Standard(molecule=m1) s1.save() s2 = Standard(molecule=m1) s2.save() s3 = Standard(molecule=m2) s3.save() FragmentationSpectrum.objects.create(precursor_mz='123.456', spec_num=0, dataset=d1, standard=s1) FragmentationSpectrum.objects.create(precursor_mz='123.45', spec_num=0, dataset=d1, standard=s2) FragmentationSpectrum.objects.create(precursor_mz='123.4', spec_num=0, dataset=d1, standard=s3) cls.m_onespectrum = m2 cls.m_twospectra = m1
def test_add_dataset(self): # create standards m1 = Molecule(name='TestMolecule1', sum_formula="C1H2O3") m1.save() m2 = Molecule(name='TestMolecule1', sum_formula="C2H2O3") m2.save() s1 = Standard(molecule=m1, inventory_id="0") s1.save() s2 = Standard(molecule=m2, inventory_id="1") s2.save() # create adduct a1 = Adduct(nM=1, delta_formula='+H+K', charge=-2) a1.save() # create a dataset d1 = Dataset(name='Dataset1') d1.save() d1.standards_present.add(s1) d1.standards_present.add(s2) d1.adducts_present.add(a1) self.assertEqual(Dataset.objects.all().count(), 1) self.assertEqual(Dataset.objects.all()[0].standards_present.count(), 2)
def dataset_upload(request): if request.method == 'POST': form = UploadFileForm(request.POST, request.FILES) if form.is_valid(): post_dict = dict(request.POST) files_dict = dict(request.FILES) logging.debug(files_dict) logging.debug(post_dict) data = {"adducts": post_dict['adducts'], "standards": post_dict['standards'], "mass_accuracy_ppm": post_dict['mass_accuracy_ppm'][0], "quad_window_mz": post_dict['quad_window_mz'][0], "lc_info": post_dict['lc_info'][0], "ms_info": post_dict['ms_info'][0]} uploaded_file = request.FILES['mzml_file'] base_name, extension = os.path.splitext(uploaded_file.name) d = Dataset(name=uploaded_file.name, processing_finished=False) d.save() mzml_filename = "{}-{}{}".format(base_name, d.id, extension) mzml_filepath = os.path.join(settings.MEDIA_ROOT, mzml_filename) logging.debug("mzML filepath: " + mzml_filepath) logging.debug("original mzML filename: " + uploaded_file.name) with open(mzml_filepath, 'w') as destination: for chunk in uploaded_file.chunks(): destination.write(chunk) d.path = mzml_filepath d.save() tasks.handle_uploaded_files.delay(data, mzml_filepath, d) return redirect('dataset-list') else: form = UploadFileForm(initial={"mass_accuracy_ppm": 10.0, 'quad_window_mz': 1.0}) autocomplete = { 'lc_info': [str(info.content) for info in LcInfo.objects.all()], 'ms_info': [str(info.content) for info in MsInfo.objects.all()], } return render(request, 'mcf_standards_browse/dataset_upload.html', {'form': form, 'autocomplete': autocomplete})
def test_make_FragmentationSpectrum(self): d1 = Dataset(name='Dataset1') d1.save() FragmentationSpectrum(precursor_mz='123.456', spec_num=0, dataset=d1).save() self.assertEqual(FragmentationSpectrum.objects.all().count(), 1)
def hazardous_waste(year=2011, verbose=True): try: dataset = Dataset.objects.get(name="Hazardous Waste Sites "+str(year)) dataset.cached = datetime.utcnow().replace(tzinfo=utc) except ObjectDoesNotExist: coor = GeoCoordinates(lat_field="Latitude", lon_field="Longitude") coor.save() names = DatasetNameField(field1_en="Generator Status", field1_name="Generator Status", field2_en="Biennial Report Link", field2_name="Biennial Report Link") names.save() location = Location(street_field="Address", city_field="City", state_field="State", zipcode_field="ZIP Code", county_field="County") dataset = Dataset( name="Hazardous Waste Sites "+str(year), url='/data/ej/'+str(year)+'/', cached=datetime.utcnow().replace(tzinfo=utc), cache_max_age=1000, remote_id_field="Handler ID", name_field="Handler Name", location=location, coordinates=coor, names=names needs_geocoding=False) dataset.save() MapPoint.objects.filter(dataset=dataset).delete() for state in ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']: short_name = 'Envirofacts_Biennial_Report_Search ' + state + '.CSV' path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data/ej/'+str(year)+'/'+short_name)) if not os.path.isfile(path): if verbose: print 'No file %s exists.' % (short_name) short_name = str(year)+' '+state+'.CSV' path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data/ej/'+str(year)+'/'+short_name)) if not os.path.isfile(path): if verbose: print 'No file %s exists.' % (short_name) continue if verbose: print 'Opening file %s' % (short_name) readfile = csv.reader(open(path, 'rb')) # verify row = readfile.next() locs = {} for i in range(len(row)): if row[i] == dataset.remote_id_field: locs['remote_id'] = i elif row[i] == dataset.name_field: locs['name'] = i elif row[i] == dataset.location.street_field: locs['street'] = i elif row[i] == dataset.location.city_field: locs['city'] = i elif row[i] == dataset.location.state_field: locs['state'] = i elif row[i] == dataset.location.zipcode_field: locs['zipcode'] = i elif row[i] == dataset.location.county_field: locs['county'] = i elif row[i] == dataset.coordinates.lat_field: locs['lat'] = i elif row[i] == dataset.coordinates.lon_field: locs['lon'] = i elif row[i] == dataset.names.field1_name: locs['field1'] = i elif row[i] == dataset.names.field2_name: locs['field2'] = i for row in readfile: kwargs = {'dataset': dataset} for key in locs: if key in ['lat', 'lon']: try: kwargs[key] = float(row[locs[key]]) except Exception: kwargs[key] = 0. elif MapPoint._meta.get_field(key).max_length < len(row[locs[key]]): kwargs[key] = row[locs[key]][:MapPoint._meta.get_field(key).max_length] else: kwargs[key] = row[locs[key]] try: kwargs['point'] = Point(kwargs['lon'], kwargs['lat']) except Exception: if verbose: print '\tInvalid lat/long for row: %s' % (row) print '\tLat: %f Lon: %f' % (kwargs['lat'], kwargs['lon']) continue mp = MapPoint(**kwargs) mp.save() if verbose: print 'File "%s" done processing' % (short_name)
def run(verbose=True, year=2010, starting_state=1): yn = '' # https://docs.djangoproject.com/en/1.7/ref/contrib/gis/layermapping/ while DEBUG and yn != 'y': yn = raw_input('This process can be memory-intensive if' 'DEBUG = True in settings as this logs all SQL. ' 'DEBUG is currently True. Please set this to False' 'if you are experiencing issues. Continue (y/n)?') \ .lower().strip() if yn == 'n': return dataset_qs = Dataset.objects.filter(name__exact=str(year)+' Census Tracts') if len(dataset_qs) > 0: ds = dataset_qs[0] ds.cached = datetime.utcnow().replace(tzinfo=utc), else: coor = GeoCoordinates(lat_field='INTPTLAT'+str(year)[-2:], lon_field='INTPTLON'+str(year)[-2:]) coor.save() names = DatasetNameField(field1_en='Land Area', field1_name='ALAND'+str(year)[-2:], field2_en='Water Area', field2_name='AWATER'+str(year)[-2:]) names.save() ds = Dataset(name=str(year)+' Census Tracts', cached=datetime.utcnow().replace(tzinfo=utc), cache_max_age=1000, name_field='NAMELSAD'+str(year)[-2:], coordinates=coor, names=names) if year == 2010: ds.remote_id_field = 'GEOID00' elif year == 2000: ds.remote_id_field = 'CTIDFP00' ds.save() tract_mapping = { 'remote_id': ds.remote_id_field, 'name': ds.name_field, 'lat': ds.coordinates.lat_field, 'lon': ds.coordinates.lon_field, 'field1': ds.names.field1_name, 'field2': ds.names.field2_name, 'mpoly': 'MULTIPOLYGON', } ftp = ftplib.FTP('ftp2.census.gov') ftp.login() ftp.cwd("/geo/tiger/TIGER2010/TRACT/" + str(year) + "/") files = ftp.nlst() MapPolygon.objects.filter(dataset_id__isnull=True).delete() max_state = MapPolygon.objects.filter(dataset_id__exact=ds.id).aggregate(Max('remote_id')) max_state = max_state['remote_id__max'] if max_state is not None: try: max_state = int(max_state)/1000000000 if max_state >= starting_state: starting_state = max_state + 1 except Exception: pass for i in [format(x, '#02d') for x in range(starting_state, 100)]: short_name = 'tl_2010_' + i + '_tract' + str(year)[-2:] tract_shp = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data/'+short_name)) if (not os.path.isfile(tract_shp+'.shp') or not os.path.isfile(tract_shp+'.shx') or not os.path.isfile(tract_shp+'.shp.xml') or not os.path.isfile(tract_shp+'.prj') or not os.path.isfile(tract_shp+'.dbf')): if short_name + '.zip' not in files: continue if verbose: print short_name + '.shp does not exist locally.\n\tDownloading from Census FTP...' try: # download the file local_file = open(tract_shp+'.zip', 'wb') ftp.retrbinary('RETR '+short_name+'.zip', local_file.write) local_file.close() # open the zip zipped = zipfile.ZipFile(tract_shp+'.zip') for suffix in ['.shp', '.prj', '.dbf', '.shp.xml', '.shx']: zipped.extract(short_name+suffix, os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))) except Exception as inst: if verbose: print '\tException:', inst print '\t'+short_name + '.shp did not download or unzip correctly. Moving on...' continue tract_shp = tract_shp + '.shp' if verbose: print '\tBegin layer mapping...' lm = LayerMapping(MapPolygon, tract_shp, tract_mapping, transform=False, encoding='iso-8859-1') while True: try: lm.save(strict=True, verbose=False) # verbose) break # exception part is untested, error didn't happen again except Exception as inst: yn = '' while yn not in ['n', 'y']: yn = raw_input('Error saving: ' + str(inst) + '\nContinue (y/n)?').strip().lower() if yn == 'y': MapPolygon.objects.filter(dataset_id__isnull=True).filter(remote_id__startswith=i).delete() else: break if verbose: print '\tLayer mapping done.' MapPolygon.objects.filter(dataset=None).update(dataset=ds) if verbose: print '\tLayer associated with dataset.' ftp.quit() if verbose: print 'All shapefiles added.'
class NDDataset: """Dataset interface for each cache Dataset""" def __init__(self, dataset_name): """Intialize the dataset""" self.dataset_name = dataset_name self.channel_list = [] self.db = CacheDB() try: self.ds = Dataset.objects.get(dataset_name = dataset_name) except ObjectDoesNotExist as e: self.fetchDataset() self.populateDataset() def fetchDataset (self): """Fetch a dataset to the list of cacheable datasets""" token = self.dataset_name.split('-')[0] try: json_info = json.loads(getURL('http://{}/ocpca/{}/info/'.format(settings.SERVER, token))) except Exception as e: logger.error("Token {} doesn not exist on the backend {}".format(token, settings.SERVER)) raise NDTILECACHEError("Token {} doesn not exist on the backend {}".format(token, settings.SERVER)) ximagesize, yimagesize, zimagesize = json_info['dataset']['imagesize']['0'] xoffset, yoffset, zoffset = json_info['dataset']['offset']['0'] xvoxelres, yvoxelres, zvoxelres = json_info['dataset']['voxelres']['0'] scalinglevels = json_info['dataset']['scalinglevels'] scalingoption = ND_scalingtoint[json_info['dataset']['scaling']] starttime, endtime = json_info['dataset']['timerange'] project_name = json_info['project']['name'] s3backend = json_info['project']['s3backend'] self.ds = Dataset(dataset_name=self.dataset_name, ximagesize=ximagesize, yimagesize=yimagesize, zimagesize=zimagesize, xoffset=xoffset, yoffset=yoffset, zoffset=zoffset, xvoxelres=xvoxelres, yvoxelres=yvoxelres, zvoxelres=zvoxelres, scalingoption=scalingoption, scalinglevels=scalinglevels, starttime=starttime, endtime=endtime, project_name=project_name, s3backend=s3backend) self.ds.save() for channel_name in json_info['channels'].keys(): channel_name = channel_name dataset_id = self.dataset_name channel_type = json_info['channels'][channel_name]['channel_type'] channel_datatype = json_info['channels'][channel_name]['datatype'] startwindow, endwindow = json_info['channels'][channel_name]['windowrange'] propagate = json_info['channels'][channel_name]['propagate'] readonly = json_info['channels'][channel_name]['readonly'] ch = Channel(channel_name=channel_name, dataset=self.ds, channel_type=channel_type, channel_datatype=channel_datatype, startwindow=startwindow, endwindow=endwindow, propagate=propagate, readonly=readonly) ch.save() def populateDataset (self): """Populate a dataset information using the information stored""" self.resolutions = [] self.cubedim = {} self.supercubedim = {} self.imagesz = {} self.offset = {} self.voxelres = {} self.scale = {} self.timerange = [self.ds.starttime, self.ds.endtime] for i in range(self.ds.scalinglevels+1): # add this level to the resolutions self.resolutions.append( i ) # set the image size. the scaled down image rounded up to the nearest cube xpixels = ((self.ds.ximagesize-1)/2**i)+1 ypixels = ((self.ds.yimagesize-1)/2**i)+1 if self.ds.scalingoption == ZSLICES: zpixels = self.ds.zimagesize else: zpixels = ((self.ds.zimagesize-1)/2**i)+1 self.imagesz[i] = [xpixels, ypixels, zpixels] # set the offset xoffseti = 0 if self.ds.xoffset == 0 else ((self.ds.xoffset)/2**i) yoffseti = 0 if self.ds.yoffset == 0 else ((self.ds.yoffset)/2**i) if self.ds.zoffset == 0: zoffseti = 0 else: if self.ds.scalingoption == ZSLICES: zoffseti = self.ds.zoffset else: zoffseti = ((self.ds.zoffset)/2**i) self.offset[i] = [ xoffseti, yoffseti, zoffseti ] # set the voxelresolution xvoxelresi = self.ds.xvoxelres * float(2**i) yvoxelresi = self.ds.yvoxelres * float(2**i) zvoxelresi = self.ds.zvoxelres if self.ds.scalingoption == ZSLICES else self.ds.zvoxelres*float(2**i) self.voxelres[i] = [ xvoxelresi, yvoxelresi, zvoxelresi ] self.scale[i] = { 'xy':xvoxelresi/yvoxelresi , 'yz':zvoxelresi/xvoxelresi, 'xz':zvoxelresi/yvoxelresi } # choose the cubedim as a function of the zscale # this may need to be changed. if self.ds.scalingoption == ZSLICES: if float(self.ds.zvoxelres/self.ds.xvoxelres)/(2**i) > 0.5: self.cubedim[i] = [128, 128, 16] else: self.cubedim[i] = [64, 64, 64] if self.ds.s3backend == S3_TRUE: self.supercubedim[i] = map(mul, self.cubedim[i], SUPERCUBESIZE) else: self.supercubedim[i] = self.cubedim[i] # Make an exception for bock11 data -- just an inconsistency in original ingest if self.ds.ximagesize == 135424 and i == 5: self.cubedim[i] = [128, 128, 16] else: # RB what should we use as a cubedim? self.cubedim[i] = [128, 128, 16] def removeDataset(self): """Remove a dataset""" self.ds.delete() try: shutil.rmtree("{}/{}".format(settings.CACHE_DIR, self.dataset_name)) except Exception as e: logger.error("Failed to remove dataset directories at {}. Error {}. Manual cleanup may be necessary.".format(self.dataset_name, e)) raise NDTILECACHEError("Failed to remove dataset directories at {}. Error {}. Manual cleanup may be necessary.".format(self.dataset_name, e)) # Accessors def getDatasetName(self): return self.ds.dataset_name def getDatasetId(self): return self.ds.dataset_id def getS3Backend(self): return self.ds.s3backend def getImageSize(self, resolution): return self.imagesz[resolution] def getVoxelRes(self, resolution): return self.voxelres[resolution] def getProjectName(self): return self.ds.project_name def getChannelObj(self, channel_name): """Return a channel object""" return NDChannel(channel_name, self.ds)
def dataset_upload(request): user = request.user if request.method == 'POST': if user.is_authenticated(): file = request.FILES.get('filename', '') file_name = file.name dest_dir = os.path.join(settings.USR_DATASET_ROOT, user.username) if not os.path.exists(dest_dir): os.makedirs(dest_dir) full_path = os.path.join(dest_dir, file_name) rel_path = os.path.join(user.username, file_name) destination = open(full_path, "wb+") for chunk in file.chunks(): destination.write(chunk) destination.close() description = request.POST['description'] access = request.POST['access'] tbl_separator = { "tab": '\t', "space": ' ', "comma": ',', "semicolon": ';' } sep_str = request.POST['sep'] sep = tbl_separator[sep_str] header = request.POST['header'] if header == 'yes': header = True elif header == 'no': header = False ## a simple check size = 0 for line in open(full_path): size += 1 dim = len(line.split(sep)) if header == True: size -= 1 # exclude the header line new_dataset = Dataset(owner=user, path=rel_path, name=file_name, dim=dim, size=size, description=description, access=access, sep=sep_str, header=header) new_dataset.save() notice = "Congratulations! Your dataset has been successfully uploaded." # return render_to_response('dataset/success.html', RequestContext(request, {'dataset': new_dataset, 'notice': notice})) return HttpResponseRedirect('/datasets/%s/' % new_dataset.id) else: notice = "You must be logged in to upload datasets" form = UploadDatasetForm() return render_to_response( 'dataset/upload.html', RequestContext(request, { 'form': form, 'notice': notice })) else: form = UploadDatasetForm() return render_to_response('dataset/upload.html', RequestContext(request, {'form': form}))