def upload_file(request): if request.method == 'POST': print(request.FILES) all_files = request.FILES.getlist('files[]') if len(all_files) > 0: try: uploaded_file = all_files[0] file_name = uploaded_file.name filename, file_extension = os.path.splitext(file_name) if not file_extension in [".csv", ".h5", '.xlxs']: return JsonResponse({ "status": "failure", "message": "Unsupported file format" }) # TODO: Check for file extension here if not os.path.exists(settings.MEDIA_ROOT): os.makedirs(settings.MEDIA_ROOT) file_name = "{}_{}".format(time.time(), file_name) file_path = os.path.join(settings.MEDIA_ROOT, file_name) fout = open(file_path, 'wb+') # Iterate through the chunks. for chunk in uploaded_file.chunks(): fout.write(chunk) fout.close() # Save this file to database dataset = Dataset(path=file_name) dataset.save() return JsonResponse({ 'status': 'success', 'message': 'Data uploaded successfully', 'file_path': file_name, "dataset_id": dataset.id }) except Exception as e: return JsonResponse({ 'status': 'failure', 'message': 'Error:Upload failed:{}'.format(e) }) else: return JsonResponse({ "status": "failure", "message": "No file found" }) else: print(request) return JsonResponse({ 'status': 'failure', 'message': 'Invalid request' })
def make_dataset(user_id, file_id, filter_id): """ Function form DataSet from File with given id by applying given filter. After filtration DataSet is added to DB :param user_id: id of User which currently works with this data :param file_id: id of a File from which should we get DataSet :param filter_id: id of Filter which we apply to File and total amount of rows for DataSet :return: id of created DataSet """ filters = Filter.query.get( filter_id).params # getting params for filtering from DB dataframe = dataframe_actualization( file_id, user_id ) # getting actual DataFrame with dropped earlier DataSets formed from same File result = filter_apply( dataframe, filters ) # getting results from filtering function as a list of included rows indexes result = [int(x) for x in result ] # transform numpy int[64] to regular int to store array in DB new_dataframe = Dataset(file_id=file_id, user_id=user_id, included_rows=result, filter_id=filter_id) db.session.add(new_dataframe) db.session.commit() return new_dataframe.id
def test_one_article_no_compounds_dataset_model(self): # Create article without compound art = Article(pmid=12345, journal='Test Journal', year=2018, volume='12', issue='12', pages='1-1000', authors='Douglas Adams', doi='10.1234/HHGTTG', title='HHGTTG', abstract='Test Abstract', num_compounds=0) # Get curator curator = Curator.query.filter_by(username='******').first() self.assertEqual(curator.username, 'test_user') # Create empty dataset and add ds = Dataset(curator=curator, articles=[art]) db.session.add(ds) db.session.commit() # Tests self.assertEqual(Dataset.query.count(), 1) self.assertEqual(Dataset.query.first().curator.username, 'test_user') self.assertEqual(len(Dataset.query.first().articles), 1) self.assertEqual(Dataset.query.first().articles[0].pmid, 12345)
def add_dataset(): tmp_dir = os.path.join(current_app.instance_path, current_app.config["TEMP_DIR"]) dataset_dir = os.path.join(current_app.instance_path, current_app.config["DATASET_DIR"]) form = AdminAddDatasetForm() if form.validate_on_submit(): temp_filename = os.path.join(tmp_dir, secure_filename(form.file_.data.filename)) if not os.path.exists(temp_filename): flash("Internal error: temporary dataset disappeared.", "error") return redirect(url_for("admin.add_dataset")) name = get_name_from_dataset(temp_filename) target_filename = os.path.join(dataset_dir, name + ".json") if os.path.exists(target_filename): flash("Internal error: file already exists!", "error") return redirect(url_for("admin.add_dataset")) os.rename(temp_filename, target_filename) if not os.path.exists(target_filename): flash("Internal error: file moving failed", "error") return redirect(url_for("admin.add_dataset")) is_demo = dataset_is_demo(target_filename) dataset = Dataset(name=name, md5sum=md5sum(target_filename), is_demo=is_demo) db.session.add(dataset) db.session.commit() flash("Dataset %r added successfully." % name, "success") return redirect(url_for("admin.add_dataset")) return render_template("admin/add.html", title="Add Dataset", form=form)
def insert_sample_datasets(self): with open(self.datasets_file, 'r') as dataset_file: reader = csv.reader(dataset_file) next(reader) for row in reader: dataset = Dataset(dataset_id=row[0], annex_uuid=row[1], description=row[2], owner_id=row[3], download_path=row[4], raw_data_url=row[5], name=row[6], modality=row[7], version=row[8], format=row[9], category=row[10], image=self.read_image(row[11]), date_created=datetime.now(), date_updated=datetime.now(), is_private=row[12] == 'True') db.session.add(dataset) db.session.commit() dataset_file.close()
def test_one_article_one_compound_dataset_model(self): # Create compound cmpd = Compound(name='Methane', smiles='C', source_organism='Saccharomyces cerevisiae') # Create article with compound art = Article(pmid=12345, journal='Test Journal', year=2018, volume='12', issue='12', pages='1-1000', authors='Douglas Adams', doi='10.1234/HHGTTG', title='HHGTTG', abstract='Test Abstract', num_compounds=1, compounds=[cmpd]) # Get curator curator = Curator.query.filter_by(username='******').first() self.assertEqual(curator.username, 'test_user') # Create empty dataset and add ds = Dataset(curator=curator, articles=[art]) db.session.add(ds) db.session.commit() # Tests self.assertEqual(Dataset.query.count(), 1) self.assertEqual(len(Dataset.query.first().articles), 1) self.assertEqual(Dataset.query.first().articles[0].pmid, 12345) self.assertEqual(len(Dataset.query.first().articles[0].compounds), 1) self.assertEqual(Dataset.query.first().articles[0].compounds[0].name, 'Methane')
def create_dataset(): name = request.form['name'] area = request.form['area'] source = request.form['source'] file_type = request.form['file_type'] link = request.form['link'] date_obtained = request.form['date_obtained'] tags = request.form['tags'] file = request.files['file'] if file and allowed_file(file.filename): filename = secure_filename(file.filename) filepath = os.path.join(datasets.config['UPLOAD_FOLDER'], filename) if os.path.exists(filepath): new_filename = '{}_{}.{}'.format( filename.rsplit('.', 1)[0], datetime.now.strftime('%H%M'), filename.rsplit('.', 1)[1]) new_filepath = os.path.join(datasets.config['UPLOAD_FOLDER'], new_filename) file.save( os.path.join(datasets.config['UPLOAD_FOLDER'], new_filename)) new_dataset = Dataset(name=name, area=area, source=source, file_type=file_type, link=link, date_obtained=date_obtained, tags=tags, file=new_filepath) else: file.save(os.path.join(datasets.config['UPLOAD_FOLDER'], filename)) new_dataset = Dataset(name=name, area=area, source=source, file_type=file_type, link=link, date_obtained=date_obtained, tags=tags, file=filepath) db.session.add(new_dataset) db.session.commit() return jsonify({'status': 'success'}), 200
def upload_file(self, file): """ Accepts FileStorage object Saves original uploaded file and serialized DataFrame for this file. Creates records in database :param file: FileStorage :return: IDs of created data set and file """ # Get check sum of file hashed_file_name = f'{self.get_file_checksum(file)}' # Change file name and save it under this name file_extension = self.get_file_extension(file.filename) file_full_name = f'{hashed_file_name}.{file_extension}' file_path = os.path.join(self.files_dir, file_full_name) # Send existing file instead of uploading a new one if file_full_name in self.files: _file = File.query.filter(File.path == file_full_name).first() _dataset = Dataset.query.filter( Dataset.file_id == _file.id).first() logger.info( 'User {0} uploaded which already existed under id {1}'.format( self.user_id, _file.id)) return 'Uploaded', _file.id, _dataset.id file.seek(0) file.save(file_path) # Serialize uploaded file as DataFrame (Update when DataFrame interface is ready) shape = self.serialize(file_full_name) # Get attributes of file file_attributes = self.get_attributes(file_path) file_attributes['name'] = file.filename file_attributes['rows'] = shape[0] file_attributes['cols'] = shape[1] # Save to db, update when dbm is ready new_file = File(path=file_full_name, attributes=file_attributes) db.session.add(new_file) db.session.flush() new_dataset = Dataset(user_id=self.user_id, file_id=new_file.id) db.session.add(new_dataset) db.session.commit() logger.info('User {0} uploaded a new file {1}'.format( self.user_id, new_file.id)) response = { 'file': { 'id': new_file.id, 'name': new_file.attributes['name'], 'size': new_file.attributes['size'], 'rows': new_file.attributes['rows'] }, 'dataset_id': new_dataset.id } return response
def register_face(): form = RegistrationForm() if form.validate_on_submit(): if form.images.data: print( "##############################################################\n" ) print(form.images.data) images = [] # random_hex = secrets.token_hex(8) name = form.name.data person = Person(name=name) db.session.add(person) db.session.commit() id = Person.query.filter_by(name=name).first().id has_at_least_one_image_with_single_face = False for image in form.images.data: # TODO see if there is one only one face in the image (because suppose if there are # 2 persons in the image and the 2nd one tries to recognize himself then if id folder # of 1st one comes first than the 2nd one's id folder, 2nd one will be recognized as # 1st person as the photo is in 1st person's id folder) face_image = faces.hasSingleFace(image) if face_image is not None: has_at_least_one_image_with_single_face = True image_fn, image_path = save_image_to_dataset( dir_name=str(id), form_image_name=image.filename, to_be_saved_image=face_image) dataset = Dataset(image_file=image_fn, author=person) db.session.add(dataset) print(image_path) images.append(image_fn) if has_at_least_one_image_with_single_face is True: db.session.commit() faces.make_new_face_encodings() flash( f'Congratulations! Successfully registered the face as {form.name.data}. Try recognizing {form.name.data}.', 'success') return redirect( url_for('recognize_faces', title='Recognize Faces')) else: flash( f'{form.name.data} not registered as there was no face in the image. Try providing different images.', 'danger') return render_template('register_face.html', title="Register Face", selectedListElement="registerFace", form=form) return render_template('register_face.html', title="Register Face", selectedListElement="registerFace", form=form)
def _seed_test_datasets_db(app): """ Seeds a set of test datasets populated from a static csv file """ from app import db from app.models import User, Dataset, DatasetStats dataset_csvfile = os.path.join(app.root_path, "../test/datasets.csv") with open(dataset_csvfile, 'r') as data_csv: csv_reader = csv.DictReader(data_csv) for row in csv_reader: dataset = Dataset( dataset_id=row['dataset_id'], annex_uuid=row['annex_uuid'], description=row['description'], owner_id=row['owner_id'], download_path=row['download_path'], raw_data_url=row['raw_data_url'], name=row['name'], modality=row['modality'], version=row['version'], format=row['format'], category=row['category'], date_created=datetime.utcnow(), date_updated=datetime.utcnow(), is_private=row['is_private'] == 'True' ) db.session.add(dataset) dataset_stats_csvfile = os.path.join( app.root_path, "../test/datasets_stats.csv") with open(dataset_stats_csvfile, 'r') as datastat_csv: csv_reader = csv.DictReader(datastat_csv) for row in csv_reader: dataset_stat = DatasetStats( dataset_id=row['dataset_id'], size=row['size'], files=row['files'], sources=row['sources'], num_subjects=row['num_subjects'], num_downloads=row['num_downloads'], num_likes=row['num_likes'], num_views=row['num_views'], date_updated=datetime.utcnow() ) db.session.add(dataset_stat) db.session.commit()
def new_dataset(): """ Creates a new mock dataset to test """ dataset = Dataset(dataset_id="8de99b0e-5f94-11e9-9e05-52545e9add8e", description="Human Brain phantom scans, Multiple MRI" " scans of a single human phantom over 11" " years, T1 weighted images and others on" " 13 scanner in 6 sites accross North America." " The data are available in minc format", name="Multicenter Single Subject Human MRI Phantom", version="1.0", is_private=False, fspath='./test/test_dataset') return dataset
def test_empty_dataset_model(self): """ Create dataset for test_user with no data """ # Get curator curator = Curator.query.filter_by(username='******').first() self.assertEqual(curator.username, 'test_user') # Create empty dataset and add ds = Dataset(curator=curator) db.session.add(ds) db.session.commit() # Tests self.assertEqual(Dataset.query.count(), 1) self.assertEqual(Dataset.query.first().curator_id, 2) self.assertEqual(len(Dataset.query.first().articles), 0)
def seed_test_db(): from app import db from app.models import User, Dataset, DatasetStats # create an admin user (Not useful now, but at least we will have a user) # import the current dataset information (to be replaced by dyanmic process) dataset_csvfile = os.path.join(app.root_path, "../test/datasets.csv") with open(dataset_csvfile, 'r') as data_csv: csv_reader = csv.DictReader(data_csv) for row in csv_reader: dataset = Dataset(dataset_id=row['dataset_id'], annex_uuid=row['annex_uuid'], description=row['description'], owner_id=row['owner_id'], download_path=row['download_path'], raw_data_url=row['raw_data_url'], name=row['name'], modality=row['modality'], version=row['version'], format=row['format'], category=row['category'], date_created=datetime.utcnow(), date_updated=datetime.utcnow(), is_private=row['is_private'] == 'True') db.session.add(dataset) dataset_stats_csvfile = os.path.join(app.root_path, "../test/datasets_stats.csv") with open(dataset_stats_csvfile, 'r') as datastat_csv: csv_reader = csv.DictReader(datastat_csv) for row in csv_reader: dataset_stat = DatasetStats( dataset_id=row['dataset_id'], size=row['size'], files=row['files'], sources=row['sources'], num_subjects=row['num_subjects'], num_downloads=row['num_downloads'], num_likes=row['num_likes'], num_views=row['num_views'], date_updated=datetime.utcnow()) db.session.add(dataset_stat) db.session.commit()
def dataset(): form = DatasetForm() form.datasets_choices() if request.method == 'POST' and form.validate_on_submit(): logger.info(f'dataset: {form.dataset.data}, split: {form.split.data}, ' f'kfolds: {form.kfolds.data}') # parameters used when the dataset was created. details = form.dataset.data.split('_') dataset = Dataset(file=form.dataset.data, window=details[1].split('w')[-1], aggregation=details[2].split('t')[-1], size=details[3].split('.csv')[0].split('s')[-1], split=form.split.data, kfolds=form.kfolds.data) db.session.add(dataset) db.session.commit() return redirect(url_for('setting.classifier')) return render_template('setting/dataset.html', form=form)
def search_dataset(): qstring = request.args.get('query') results, total = Dataset.search(qstring) output = {} output["total"] = total output["results"] = [] for result in results: values = { 'name': result.name, 'area': result.area, 'source': result.source, 'file_type': result.file_type, 'link': result.link, 'date_obtained': result.date_obtained, 'tags': result.tags, 'id': result.id, } output["results"].append(values) return jsonify(output), 200
def insert_sample_datasets(self): with open(self.datasets_file, 'r') as dataset_file: reader = csv.reader(dataset_file) next(reader) for row in reader: dataset = Dataset(id=row[0], dataset_id=row[1], owner_id=row[2], name=row[3], modality=row[4], version=row[5], format=row[6], category=row[7], date_created=datetime.now(), date_updated=datetime.now(), is_private=row[8] == 'True') db.session.add(dataset) db.session.commit() dataset_file.close()
def new_dataset(): """ Creastes a new mock dataset to test """ dataset = Dataset(dataset_id="8de99b0e-5f94-11e9-9e05-52545e9add8e", annex_uuid="4fd032a1-220a-404e-95ac-ccaa3f7efcb7", description="Human Brain phantom scans, Multiple MRI" " scans of a single human phantom over 11" " years, T1 weighted images and others on" " 13 scanner in 6 sites accross North America." " The data are available in minc format", owner_id=1, download_path="multicenter-phantom", raw_data_url="https://phantom-dev.loris.ca", name="Multicenter Single Subject Human MRI Phantom", modality="Imaging", version="1.0", format="minc", category="Phantom", is_private=False) return dataset
def push_docker_img(self, dir_name,current_user_id, name, report): client = docker.from_env() current_user_obj = User.query.get(current_user_id) image_name = current_user_obj.username + '-' + name repo_name = os.environ.get('DOCKER_REPO') + '/' print(client.images.push(repository=repo_name + image_name), file=sys.stderr) ########## UPDATING DB ###################################################################### # add dataset to database new_dataset = Dataset(url="https://hub.docker.com/raas/" + repo_name + image_name + "/", author=current_user_obj, name=name, report=report) db.session.add(new_dataset) db.session.commit() ########## CLEANING UP ###################################################################### self.clean_up_datasets(dir_name) print("Returning")
def new_dataset(): if request.method == 'POST': print(request.form) #if request.form['dataset_uploaded']: # flash(f'Your dataset has been created!', 'success') # return redirect(url_for('datasets.new_dataset')) dataset = Dataset(title=request.form['title'], abstract=request.form['abstract'], category=request.form['category']) db.session.add(dataset) db.session.commit() return json.dumps({"dataset_id": dataset.id}) form = DatasetForm() if form.validate_on_submit(): print(f'datasets data: {form.datasets}') # for files in form.datasets.getlist(): # print(filles) return render_template('create_dataset.html', title='New Dataset',form=form, legend='New Dataset')
def run_command(): atlas.dbInit("mysql+pymysql", "jvansan", "", "127.0.0.1", "np_atlas_2018_07") sess = atlas.startSession() curator = Curator.query.filter_by(username="******").first() articles = get_first_N_article_with_compounds(sess, 100) ds_articles = [] for art_compound in articles: a = art_compound[0] compounds = [] for c in art_compound[1]: org = get_compound_origin(c, sess) org_string = org.genus.name + " " + org.species compounds.append( Compound(name=c.names[0].name, smiles=c.smiles, source_organism=org_string)) # Make sure to sort the names of the compounds before insertion compounds.sort(key=lambda x: x.name) art = Article(pmid=a.pmid, journal=a.journal.title, year=a.year, volume=a.volume, issue=a.issue, pages=a.pages, authors=a.authors, doi=a.doi, title=a.title, abstract=a.abstract, num_compounds=len(compounds), compounds=compounds) ds_articles.append(art) ds = Dataset(curator=curator, articles=ds_articles) db.session.add(ds) db.session.commit()
def _update_datasets(app): """ Updates from conp-datasets """ from app import db from app.models import ArkId from app.models import Dataset as DBDataset from app.models import DatasetAncestry as DBDatasetAncestry from sqlalchemy import exc from datalad import api from datalad.api import Dataset as DataladDataset import fnmatch import json from pathlib import Path import git datasetsdir = Path(app.config['DATA_PATH']) / 'conp-dataset' datasetsdir.mkdir(parents=True, exist_ok=True) # Initialize the git repository object try: repo = git.Repo(datasetsdir) except git.exc.InvalidGitRepositoryError: repo = git.Repo.clone_from('https://github.com/CONP-PCNO/conp-dataset', datasetsdir, branch='master') # Update to latest commit origin = repo.remotes.origin origin.pull('master') repo.submodule_update(recursive=False, keep_going=True) d = DataladDataset(path=datasetsdir) if not d.is_installed(): api.clone(source='https://github.com/CONP-PCNO/conp-dataset', path=datasetsdir) d = DataladDataset(path=datasetsdir) try: d.install(path='', recursive=True) except Exception as e: print("\033[91m") print("[ERROR ] An exception occurred in datalad update.") print(e.args) print("\033[0m") return print('[INFO ] conp-dataset update complete') print('[INFO ] Updating subdatasets') for ds in d.subdatasets(): print('[INFO ] Updating ' + ds['gitmodule_url']) subdataset = DataladDataset(path=ds['path']) if not subdataset.is_installed(): try: api.clone(source=ds['gitmodule_url'], path=ds['path']) subdataset = DataladDataset(path=ds['path']) subdataset.install(path='') except Exception as e: print("\033[91m") print( "[ERROR ] An exception occurred in datalad install for " + str(ds) + ".") print(e.args) print("\033[0m") continue # The following relates to the DATS.json files # of the projects directory in the conp-dataset repo. # Skip directories that aren't projects. patterns = [app.config['DATA_PATH'] + '/conp-dataset/projects/*'] if not any( fnmatch.fnmatch(ds['path'], pattern) for pattern in patterns): continue dirs = os.listdir(ds['path']) descriptor = '' for file in dirs: if fnmatch.fnmatch(file.lower(), 'dats.json'): descriptor = file if descriptor == '': print("\033[91m") print('[ERROR ] DATS.json file can`t be found in ' + ds['path'] + ".") print("\033[0m") continue try: with open(os.path.join(ds['path'], descriptor), 'r') as f: dats = json.load(f) except Exception as e: print("\033[91m") print("[ERROR ] Descriptor file can't be read.") print(e.args) print("\033[0m") continue # use dats.json data to fill the datasets table # avoid duplication / REPLACE instead of insert dataset = DBDataset.query.filter_by( dataset_id=ds['gitmodule_name']).first() # pull the timestamp of the first commit in the git log for the dataset create date createDate = datetime.utcnow() try: createTimeStamp = os.popen( "git -C {} log --pretty=format:%ct --reverse | head -1".format( ds['path'])).read() createDate = datetime.fromtimestamp(int(createTimeStamp)) except Exception: print("[ERROR ] Create Date couldnt be read.") firstMergeDate = datetime.utcnow() try: firstMergeTimeStamp = os.popen( "git -C {} log --pretty=format:%ct --reverse {} | head -1". format(app.config['DATA_PATH'] + "/conp-dataset", ds['path'])).read() firstMergeDate = datetime.fromtimestamp(int(firstMergeTimeStamp)) except Exception: print( "[ERROR ] First merge date of the submodule dataset could not be read." ) # last commit in the git log for the dataset update date updateDate = datetime.utcnow() try: createTimeStamp = os.popen( "git -C {} log --pretty=format:%ct | head -1".format( ds['path'])).read() updateDate = datetime.fromtimestamp(int(createTimeStamp)) except Exception: print("[ERROR ] Update Date couldnt be read.") # get the remote URL remoteUrl = None try: remoteUrl = os.popen( "git -C {} config --get remote.origin.url".format( ds['path'])).read() except Exception: print("[ERROR ] Remote URL couldnt be read.") if dataset is None: dataset = DBDataset() dataset.dataset_id = ds['gitmodule_name'] dataset.date_created = createDate dataset.date_added_to_portal = firstMergeDate if dataset.date_created != createDate: dataset.date_created = createDate # check for dataset ancestry extraprops = dats.get('extraProperties', []) for prop in extraprops: if prop.get('category') == 'parent_dataset_id': for x in prop.get('values', []): if x.get('value', None) is None: continue datasetAncestry = DBDatasetAncestry() datasetAncestry.id = str(uuid.uuid4()) datasetAncestry.parent_dataset_id = 'projects/' + \ x.get('value', None) datasetAncestry.child_dataset_id = dataset.dataset_id try: db.session.merge(datasetAncestry) db.session.commit() except exc.IntegrityError: # we already have a record of this ancestry db.session.rollback() if not dataset.date_added_to_portal: dataset.date_added_to_portal = firstMergeDate dataset.date_updated = updateDate dataset.fspath = ds['path'] dataset.remoteUrl = remoteUrl dataset.description = dats.get('description', 'No description in DATS.json') dataset.name = dats.get('title', os.path.basename(dataset.dataset_id)) db.session.merge(dataset) db.session.commit() # if the dataset does not have an ARK identifier yet, generate it dataset_with_ark_id_list = [ row[0] for row in db.session.query(ArkId.dataset_id).all() ] if dataset.dataset_id not in dataset_with_ark_id_list: new_ark_id = ark_id_minter(app, 'dataset') save_ark_id_in_database(app, 'dataset', new_ark_id, dataset.dataset_id) print('[INFO ] ' + ds['gitmodule_name'] + ' updated.')
def update_face(person_id): form = UpdateDatasetForm() person = Person.query.get_or_404(person_id) datasets = Dataset.query.filter_by(author=person).all() print( '###############################################################################' ) print(person) print(datasets) print( '###############################################################################' ) if form.validate_on_submit(): if form.images.data: print( "##############################################################\n" ) print(form.images.data) images = [] # random_hex = secrets.token_hex(8) has_at_least_one_image_with_single_face = False for image in form.images.data: # TODO see if there is one only one face in the image (because suppose if there are # 2 persons in the image and the 2nd one tries to recognize himself then if id folder # of 1st one comes first than the 2nd one's id folder, 2nd one will be recognized as # 1st person as the photo is in 1st person's id folder) # see if there is any image or not if image.mimetype.find("image") == -1: break face_image = faces.hasSingleFace(image) if face_image is not None: has_at_least_one_image_with_single_face = True image_fn, image_path = save_image_to_dataset( dir_name=str(person_id), form_image_name=image.filename, to_be_saved_image=face_image) dataset = Dataset(image_file=image_fn, author=person) db.session.add(dataset) print(image_path) images.append(image_fn) name = form.name.data if name != person.name: person.name = name image_deleted_from_dataset = False if form.images_to_be_deleted.data: for image_id in form.images_to_be_deleted.data.split(";"): dataset = Dataset.query.get(image_id) path_to_image = './app/static/images/dataset/' + str( person_id) + '/' + dataset.image_file if os.path.exists(path_to_image): image_deleted_from_dataset = True os.remove(path_to_image) db.session.delete(dataset) db.session.commit() # update dataset_faces.dat if either an image was deleted from dataset or new image was added if has_at_least_one_image_with_single_face is True or image_deleted_from_dataset is True: faces.make_new_face_encodings() flash(f'Successfully updated {form.name.data}.', 'success') return redirect(url_for('face', person_id=person_id)) else: flash(f'{form.name.data} not updated.', 'danger') return render_template('update_face.html', title=person.name, file_select=True, enableDeletePersonPhoto=True, datasets=datasets, form=form) elif request.method == 'GET': form.name.data = person.name return render_template('update_face.html', title=person.name, file_select=True, enableDeletePersonPhoto=True, datasets=datasets, form=form)
def _update_datasets(app): """ Updates from conp-datasets """ from app import db, config from app.models import Dataset as DBDataset from datalad import api from datalad.api import Dataset as DataladDataset import fnmatch import json datasetspath = app.config['DATA_PATH'] d = DataladDataset(path=datasetspath + '/conp-dataset') if not d.is_installed(): api.clone(source='https://github.com/CONP-PCNO/conp-dataset', path=datasetspath + '/conp-dataset') d = DataladDataset(path=datasetspath + '/conp-dataset') d.install(path='', recursive=True) try: d.update(path='', merge=True, recursive=True) except Exception as e: print("\033[91m") print("[ERROR ] An exception occurred in datalad update.") print(e.args) print("\033[0m") return print('[INFO ] conp-dataset update complete') print('[INFO ] Updating subdatasets') for ds in d.subdatasets(): print('[INFO ] Updating ' + ds['gitmodule_url']) subdataset = DataladDataset(path=ds['path']) if not subdataset.is_installed(): try: api.clone(source=ds['gitmodule_url'], path=ds['path']) subdataset = DataladDataset(path=ds['path']) subdataset.install(path='') except Exception as e: print("\033[91m") print( "[ERROR ] An exception occurred in datalad install for " + str(ds) + ".") print(e.args) print("\033[0m") continue dirs = os.listdir(ds['path']) descriptor = '' for file in dirs: if fnmatch.fnmatch(file.lower(), 'dats.json'): descriptor = file if descriptor == '': print("\033[91m") print('[ERROR ] DATS.json file can`t be found in ' + ds['path'] + ".") print("\033[0m") continue try: with open(os.path.join(ds['path'], descriptor), 'r') as f: dats = json.load(f) except Exception as e: print("\033[91m") print("[ERROR ] Descriptor file can't be read.") print(e.args) print("\033[0m") continue # use dats.json data to fill the datasets table # avoid duplication / REPLACE instead of insert dataset = DBDataset.query.filter_by( dataset_id=ds['gitmodule_name']).first() if dataset is None: dataset = DBDataset() dataset.dataset_id = ds['gitmodule_name'] dataset.date_created = datetime.utcnow() dataset.date_updated = datetime.utcnow() dataset.fspath = ds['path'] dataset.description = dats.get('description', 'No description in DATS.json') dataset.name = dats.get('title', os.path.basename(dataset.dataset_id)) db.session.merge(dataset) db.session.commit() print('[INFO ] ' + ds['gitmodule_name'] + ' updated.')
def _update_datasets(app): """ Updates from conp-datasets """ from app import db, config from app.models import Dataset as DBDataset from datalad import api from datalad.api import Dataset as DataladDataset import fnmatch import json from pathlib import Path import git datasetsdir = Path(app.config['DATA_PATH']) / 'conp-dataset' datasetsdir.mkdir(parents=True, exist_ok=True) # Initialize the git repository object try: repo = git.Repo(datasetsdir) except git.exc.InvalidGitRepositoryError as e: repo = git.Repo.clone_from( 'https://github.com/CONP-PCNO/conp-dataset', datasetsdir, branch='master' ) # Update to latest commit origin = repo.remotes.origin origin.pull('master') repo.submodule_update(recursive=False, keep_going=True) d = DataladDataset(path=datasetsdir) if not d.is_installed(): api.clone( source='https://github.com/CONP-PCNO/conp-dataset', path=datasetsdir ) d = DataladDataset(path=datasetsdir) try: d.install(path='', recursive=True) except Exception as e: print("\033[91m") print("[ERROR ] An exception occurred in datalad update.") print(e.args) print("\033[0m") return print('[INFO ] conp-dataset update complete') print('[INFO ] Updating subdatasets') for ds in d.subdatasets(): print('[INFO ] Updating ' + ds['gitmodule_url']) subdataset = DataladDataset(path=ds['path']) if not subdataset.is_installed(): try: api.clone( source=ds['gitmodule_url'], path=ds['path'] ) subdataset = DataladDataset(path=ds['path']) subdataset.install(path='') except Exception as e: print("\033[91m") print("[ERROR ] An exception occurred in datalad install for " + str(ds) + ".") print(e.args) print("\033[0m") continue dirs = os.listdir(ds['path']) descriptor = '' for file in dirs: if fnmatch.fnmatch(file.lower(), 'dats.json'): descriptor = file if descriptor == '': print("\033[91m") print('[ERROR ] DATS.json file can`t be found in ' + ds['path'] + ".") print("\033[0m") continue try: with open(os.path.join(ds['path'], descriptor), 'r') as f: dats = json.load(f) except Exception as e: print("\033[91m") print("[ERROR ] Descriptor file can't be read.") print(e.args) print("\033[0m") continue # use dats.json data to fill the datasets table # avoid duplication / REPLACE instead of insert dataset = DBDataset.query.filter_by(dataset_id=ds['gitmodule_name']).first() if dataset is None: dataset = DBDataset() dataset.dataset_id = ds['gitmodule_name'] dataset.date_created = datetime.utcnow() dataset.date_updated = datetime.utcnow() dataset.fspath = ds['path'] dataset.description = dats.get('description', 'No description in DATS.json') dataset.name = dats.get( 'title', os.path.basename(dataset.dataset_id) ) db.session.merge(dataset) db.session.commit() print('[INFO ] ' + ds['gitmodule_name'] + ' updated.')
from flask import abort, jsonify from . import main from app.models import Dataset import json import pydap.client dataset = Dataset.objects(name='vic_conus_3km').first() dataset.data = pydap.client.open_url(dataset.url) @main.route("/") def hello(): return "Hello World" @main.route("/<api_key>/<location>", methods=['GET']) def opendap_json(api_key=None, location=None): try: loc = location.split(',') except: abort(404) try: lat = float(loc[0]) lon = float(loc[1]) except: abort(404) if lat and lon: location = json.loads(dataset.get_xy(lat=lat,lon=lon)) else: return "error: provide latitude and longitude"
def create(request): if request.method == 'POST': dataset = Dataset() dataset.owner = request.user dataset.name = request.POST['name'] dataset.number_of_labels = request.POST['number_of_labels'] dataset.description = request.POST['description'] if not dataset.privacy_validation(request.POST['privacy']): return render(request, '400.html', status=400) dataset_file = request.FILES['dataset'] reader = csv.reader(dataset_file, encoding='utf-8') header_list = reader.next() label_name = request.POST.get('label_name', 'CLASS') append_label_column = request.POST.get('append_label_column', False) if not append_label_column: label_index = header_list.index(label_name) header_list.pop(label_index) header_list.append(label_name) dataset.header = csvlist_to_string(header_list).strip() dataset.save() samples_count = 0 for row_list in reader: samples_count += 1 if not append_label_column: label_string = row_list.pop(label_index) row = csvlist_to_string(row_list).strip() sample = Sample(dataset=dataset, data=row, original_index=samples_count) sample.save() if not append_label_column and label_string: label = Label(owner=request.user, sample=sample, label=label_string) label.save() sample.times_labeled = 1 sample.save() dataset.number_of_samples = samples_count dataset.save() return HttpResponseRedirect( reverse('datasets_show', args=(dataset.id,)))
def load_dataset(directory=None): """Page to load new datasets from within HOST.""" data_dir = os.path.join(current_app.config['ABS_PATH'], 'static/datasets/preloaded') # All raters all_raters = request.args.get('all_raters', 0, type=int) # Choices of directories to load in form dir_choices = [ d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d)) ] dir_choices.sort() info = {'directory': directory, 'new_imgs': 0} if directory is not None: # Save useful info for jinja template info['model'] = Dataset.query.filter_by(name=directory).first() # Check access before loading files if info['model']: info['access'] = current_user.has_access(info['model']) info['saved_imgs'] = info['model'].images.count() else: info['access'] = True info['saved_imgs'] = 0 if info['access']: # Count the files in the directory all_files = [] for root, _, files in os.walk(os.path.join(data_dir, directory)): all_files.extend([ os.path.join(root, f) for f in files if not f.startswith('.') ]) info['new_imgs'] = len(all_files) - info['saved_imgs'] form = LoadDatasetForm() form.dir_name.choices = dir_choices # Form submission must be restricted by access in template if form.validate_on_submit(): if info['model']: new_dataset = False else: # If dataset is not a Dataset Model (does not exist), create it info['model'] = Dataset(name=form.dir_name.data, creator=current_user) db.session.add(info['model']) new_dataset = True flash(f"{info['model'].name} was created as an OPEN dataset", 'info') if len(all_files) > 10: current_user.launch_task('load_data', f"Loading {info['new_imgs']} new images " f"to {info['model'].name} dataset...", icon='load', alert_color='primary', files=all_files, dataset_name=info['model'].name, new_dataset=new_dataset, ignore_existing=True) db.session.commit() else: try: # Function returns number of uploaded images loaded_imgs = load_data(all_files, dataset=info['model'], host=True, new_dataset=new_dataset) except OrphanDatasetError: # If orphaned dataset, delete it # TODO this somehow throws an error; look into this # db.session.delete(info['model']) flash( 'No new files were successfully loaded ' 'leaving the dataset empty', 'warning') else: if not new_dataset and loaded_imgs == 0: flash('No new files were successfully loaded', 'warning') else: flash(f'{loaded_imgs} file(s) successfully loaded!', 'success') finally: # Commit changes in database db.session.commit() return redirect(url_for('main.dashboard', all_raters=all_raters)) # Form validation errors for _, error in form.errors.items(): flash(error[0], 'danger') return render_template('data/load_dataset.html', form=form, title="Load Dataset", dictionary=info, all_raters=all_raters)
def upload_dataset(): """Page to upload new dataset of MRI.""" data_dir = os.path.join(current_app.config['ABS_PATH'], 'static/datasets/uploaded') # All raters all_raters = request.args.get('all_raters', 0, type=int) form = UploadDatasetForm() if form.validate_on_submit(): files = request.files.getlist(form.dataset.name) savedir = os.path.join(data_dir, form.dataset_name.data) # If savedir does not exist; create it if not os.path.isdir(savedir): os.makedirs(savedir) # Form checks that dataset does not exist already # so there is no need to check here; just create it dataset = Dataset(name=form.dataset_name.data, creator=current_user, private=form.privacy.data) dataset.grant_access(current_user) db.session.add(dataset) privacy = 'a PRIVATE' if dataset.private else 'an OPEN' flash(f"{dataset.name} was created as {privacy} dataset", 'info') if len(files) > 10: # Redis can't handle FileStorage # First upload all files files_uploaded = upload_data(files, savedir) current_user.launch_task('load_data', f'Uploading {len(files)} new images ' f'to {dataset.name} dataset...', icon='upload', alert_color='primary', files=files_uploaded, dataset_name=dataset.name, new_dataset=True) db.session.commit() else: try: # Function returns number of uploaded images loaded_imgs = load_data(files, dataset, savedir=savedir) except OrphanDatasetError: # If orphaned dataset, delete it db.session.delete(dataset) else: # If not, that means at least one image was uploaded # flash success with number of uploads flash(f'{loaded_imgs} file(s) successfully loaded!', 'success') finally: # Commit changes in database db.session.commit() return redirect(url_for('main.dashboard', all_raters=all_raters)) for _, error in form.errors.items(): flash(error[0], 'danger') return render_template('data/upload_dataset.html', form=form, all_raters=all_raters, title='Upload Dataset')
def upload_dataset(): user = User.query.filter_by(UserAccountId=current_user.get_id()).first() form = DatasetForm() if form.submit.data and form.validate_on_submit(): odesc = Dataset.query.order_by(Dataset.OfferingId.desc()).first() if not odesc: o_id = 1 else: o_id = odesc.OfferingId + 1 o_type = int(request.form['offering_type']) df_ext = form.dataset_upload.data.filename.rsplit('.', 1)[1] df_name = secure_filename( str(user.UserAccountId) + '-' + str(o_id) + '-df.' + df_ext) df_path = os.path.join(app.root_path, 'dataset-files', df_name) form.dataset_upload.data.save( os.path.join(app.root_path, 'dataset-files', df_name)) if form.sample_upload.data: sf_ext = form.sample_upload.data.filename.rsplit('.', 1)[1] sf_name = secure_filename( str(user.UserAccountId) + '-' + str(o_id) + '-sf.' + sf_ext) sf_path = os.path.join(app.root_path, 'sample-files', sf_name) form.sample_upload.data.save( os.path.join(app.root_path, 'sample-files', sf_name)) else: sf_name = None sf_path = None lf_ext = form.license_upload.data.filename.rsplit('.', 1)[1] lf_name = secure_filename( str(user.UserAccountId) + '-' + str(o_id) + '-lf.' + lf_ext) lf_path = os.path.join(app.root_path, 'license-files', lf_name) form.license_upload.data.save( os.path.join(app.root_path, 'license-files', lf_name)) if o_type == 1: uf = 0 elif o_type == 2: uf = int(form.update_frequency.data) dataset = Dataset(OfferingName=form.offering_name.data, OfferingType=o_type, Category=form.category.data, \ Description=form.description.data, Region=form.region.data, DateCreated=form.date_created.data, \ DateLastUpdated=form.last_updated.data, UpdateFrequency=uf, DataFileName=df_name, DataFilePath=df_path, \ SampleFileName=sf_name, SampleFilePath=sf_path, LicenseFileName=lf_name, LicenseFilePath=lf_path, \ Price=form.price.data, owner=user) db.session.add(dataset) db.session.commit() # new_row = [form.offering_name.data, user.UserName, form.category.data, form.region.data, o_type, uf, form.description.data, form.price.data] # train_file = os.path.join(app.root_path, 'train.csv') # with open(train_file,'a') as fd: # writer = csv.writer(fd, lineterminator='\n') # writer.writerow(new_row) return redirect(url_for('my_offerings')) if form.calc.data and form.offering_name.data and form.description.data and form.region.data: o_type = int(request.form['offering_type']) if o_type == 1: uf = 0 elif o_type == 2: uf = int(form.update_frequency.data) data = { 'name': [form.offering_name.data], 'seller': [user.UserName], 'category': [form.category.data], 'region': [form.region.data], 'offering_type': [o_type], 'update_frequency': [uf], 'description': [form.description.data], 'price': [0] } df = pd.DataFrame(data) train_file = os.path.join(app.root_path, 'train.csv') sug_price = calc_price(df, train_file) form.suggested_price.data = 'Suggested Price: $' + str( round(sug_price, 2)) elif form.calc.data: form.suggested_price.data = '' flash( 'Name, Region and Description are required for suggesting price!') return render_template('dataset-upload.html', title='Upload Data', form=form)
def create(request): if request.method == 'POST': dataset = Dataset() dataset.owner = request.user dataset.name = request.POST['name'] dataset.number_of_labels = request.POST['number_of_labels'] dataset.description = request.POST['description'] if not dataset.privacy_validation(request.POST['privacy']): return render(request, '400.html', status=400) dataset_file = request.FILES['dataset'] reader = csv.reader(dataset_file, encoding='utf-8') header_list = reader.next() label_name = request.POST.get('label_name', 'CLASS') append_label_column = request.POST.get('append_label_column', False) if not append_label_column: label_index = header_list.index(label_name) header_list.pop(label_index) header_list.append(label_name) dataset.header = csvlist_to_string(header_list).strip() dataset.save() samples_count = 0 for row_list in reader: samples_count += 1 if not append_label_column: label_string = row_list.pop(label_index) row = csvlist_to_string(row_list).strip() sample = Sample(dataset=dataset, data=row, original_index=samples_count) sample.save() if not append_label_column and label_string: label = Label(owner=request.user, sample=sample, label=label_string) label.save() sample.times_labeled = 1 sample.save() dataset.number_of_samples = samples_count dataset.save() return HttpResponseRedirect( reverse('datasets_show', args=(dataset.id, )))