def form_valid(self, form): user_request = form.save() dataset = Dataset() dataset.project_name = "Request for " + user_request.user_email_address dataset.save() user_request.dataset = dataset user_request.save() return super(UserDataRequestCreate, self).form_valid(form)
def dataset(): form = UploadForm() user_id = session.get('id') user = User.query.filter_by( id=user_id, ).first() if (request.method == 'POST'): if form.validate_on_submit() and 'archive' in request.files: for archived_file in request.files.getlist('archive'): filename = archives.save(archived_file) logging.info(filename) print('request.files ', filename) dataset = Dataset( name=form.name.data, description=form.description.data, filename=filename, user_id=user_id, ) try: db.session.add(dataset) db.session.commit() flash('Dataset uploaded successfully!', 'success') except Exception as e: print(e) db.session.rollback() error = 'Error Uploading the file.' flash(error, 'error') else: error = 'Invalid File, ZIP and RAR Files only!' flash(error, 'error') files = Dataset.query.all() return render_template('dataset/dataset.html', files=files, archives=archives, form=form, user=user)
def examples_dataset(request): datasets = Dataset.objects.filter(user=User.objects.get(pk=17)) context = {'datasets': datasets} if request.GET.get('description'): return HttpResponse( datasets.get(pk=request.GET['description']).description) if request.method == "POST": dataset = Dataset.objects.get(pk=request.POST['id']) if Dataset.objects.filter(user=request.user, title=dataset.title): context[ 'error_message'] = 'dataset with title {} already exists!'.format( dataset.title) return render(request, 'petpen/example_dataset.html', context) dataset_dir = "datasets/{}/{}/".format(request.user.id, dataset.title) newDataset = Dataset( title=dataset.title, user=request.user, training_input_file=File( dataset.training_input_file, op.split(dataset.training_input_file.name)[1]), training_output_file=File( dataset.training_output_file, op.split(dataset.training_output_file.name)[1]), testing_input_file=File( dataset.testing_input_file, op.split(dataset.testing_input_file.name)[1]), testing_output_file=File( dataset.testing_output_file, op.split(dataset.testing_output_file.name)[1]), train_input_size=dataset.train_input_size, test_input_size=dataset.test_input_size, train_output_size=dataset.train_output_size, test_output_size=dataset.test_output_size, train_samples=dataset.train_samples, test_samples=dataset.test_samples, input_shape=dataset.input_shape, output_shape=dataset.output_shape, description=dataset.description, filetype=dataset.filetype, is_image=dataset.is_image) newDataset.save() # shutil.copytree(op.join(MEDIA_ROOT,"models/{}/{}/".format(17,project.title)),op.join(MEDIA_ROOT,model_dir)) context[ 'info'] = 'The example dataset {} is copied into your account.'.format( dataset.title) return render(request, 'petpen/example_dataset.html', context)
def __init__(self, args): self._training_set = Dataset(args.training_set, args) self._test_set = Dataset(args.test_set, args) # Set target names. self._target_names = ['Positive', 'Negative'] if args.include_neutral: self._target_names.append('Neutral') # Set n-gram length. ngram_length = () if args.ngram_length == 'unigram': ngram_length = (1, 1) elif args.ngram_length == 'bigram': ngram_length = (2, 2) elif args.ngram_length == 'trigram': ngram_length = (3, 3) # Set vectorizer. if args.vectorizer == 'tf-idf': self._vectorizer = TfidfVectorizer(tokenizer=Lemmatizer(args), ngram_range=ngram_length) elif args.vectorizer == 'count': self._vectorizer = CountVectorizer(tokenizer=Lemmatizer(args), ngram_range=ngram_length)
def export_to_excel(dataset: Dataset) -> str: """ Given a Dataset object, export its `file` in excel and return the file name Return: file_name with extension in str format """ dataframe = dataset.get_dataframe() current_timestamp = timezone.now() file_name_formatter = '%Y_%m_%d_%I_%M_%S_%p' file_name = f'{current_timestamp.strftime(file_name_formatter)}.xls' # generate excel file dataframe.to_excel(file_name) return file_name
def handle(self, *args, **options): username = raw_input('MotionDB Username: '******'MotionDB Password: '******'') # Configure Ice and Connect to database. properties = Ice.createProperties(sys.argv) properties.load(ICE_CLIENT_CONFIG_PATH) init_data = Ice.InitializationData() init_data.properties = properties ic = Ice.initialize(init_data) router = Glacier2.RouterPrx.checkedCast(ic.getDefaultRouter()) session = router.createSession(username, password) db = MotionDatabase.MotionDatabaseSessionPrx.checkedCast(session) # Collect all matching C3D and MMM files. self.stdout.write('Collecting data from motion database ...') q = MotionFile.objects.filter(is_hidden=False, is_broken_reported=False, is_broken_confirmed=False) motion_ids = list(set([m.motion_db_id for m in q.all()])) all_c3d_files = [] all_mmm_files = [] all_annotations = [] all_motion_ids = [] all_database_entries = [] for idx, motion_id in enumerate(motion_ids): self.stdout.write(' {}/{} ...'.format(idx + 1, len(motion_ids)), ending= ' ') self.stdout.flush() files = db.listFiles(motion_id) c3d_files = [f for f in files if f.fileType == 'Vicon C3D File'] mmm_files = [f for f in files if f.fileType == 'Converted MMM Motion'] for c3d_file in c3d_files: # Ensure that only visible data is exported. assert c3d_file.visibility == MotionDatabase.VisibilityLevel.Public # Fetch motion file from database. try: motion_file = MotionFile.objects.get(motion_db_file_id=c3d_file.id) except MotionFile.DoesNotExist: continue assert motion_file.motion_db_id == motion_id # Skip broken motions. if motion_file.is_broken_reported or motion_file.is_broken_confirmed: continue # Find the matching MMM file for the given C3D file. mmm_file = None for f in mmm_files: if f.originatedFrom.id == c3d_file.id: mmm_file = f break assert mmm_file is not None # Get all annotations. We include data even if it isn't annotated yet. annotations = Annotation.objects.filter(motion_file=motion_file).all() all_c3d_files.append(c3d_file) all_mmm_files.append(mmm_file) all_annotations.append(annotations) all_motion_ids.append(motion_id) all_database_entries.append(motion_file) self.stdout.write('done') n_motions = len(all_c3d_files) assert n_motions == len(all_mmm_files) assert n_motions == len(all_annotations) assert n_motions == len(all_motion_ids) assert n_motions == len(all_database_entries) self.stdout.write('done, obtained {} motions and their annotations'.format(n_motions)) self.stdout.write('') # Create temporary directory. tmp_path = mkdtemp() self.stdout.write('Downloading data to "{}" ...'.format(tmp_path)) motion_entry_cache = {} nb_annotations = 0 nb_motions = 0 for idx, (database_entry, c3d_file, mmm_file, annotations, motion_id) in enumerate(zip(all_database_entries, all_c3d_files, all_mmm_files, all_annotations, all_motion_ids)): self.stdout.write(' {}/{}: ...'.format(idx + 1, n_motions), ending=' ') self.stdout.flush() filename_prefix = '{0:05d}'.format(database_entry.id) filename_mmm = filename_prefix + '_mmm.xml' filename_c3d = filename_prefix + '_raw.c3d' filename_meta = filename_prefix + '_meta.json' filename_annotation = filename_prefix + '_annotations.json' # Download MMM. r = db.getFileReader(mmm_file.id) d = read_file(r) r.destroy() if d is None: return -1 with open(os.path.join(tmp_path, filename_mmm), 'wb') as f: f.write(d) # Download C3D. r = db.getFileReader(c3d_file.id) d = read_file(r) r.destroy() if d is None: return -1 with open(os.path.join(tmp_path, filename_c3d), 'wb') as f: f.write(d) # Retrieve motion information. if c3d_file.attachedToId in motion_entry_cache: motion_entry = motion_entry_cache[c3d_file.attachedToId] else: motion_entry = db.getMotion(c3d_file.attachedToId) motion_entry_cache[c3d_file.attachedToId] = motion_entry # Save annotations and extract their IDs for metadata. with open(os.path.join(tmp_path, filename_annotation), 'w') as f: json.dump([a.description for a in annotations], f) mat_annotation_ids = [a.id for a in annotations] # Save metadata. annotation_perplexities = [a.perplexity for a in annotations] assert len(annotation_perplexities) == len(annotations) with open(os.path.join(tmp_path, filename_meta), 'w') as f: data = { 'motion_annotation_tool': { 'id': database_entry.id, 'annotation_ids': mat_annotation_ids, }, 'source': { 'institution': { 'name': motion_entry.associatedInstitution.name, 'identifier': motion_entry.associatedInstitution.acronym.lower(), }, 'database': { 'identifier': 'kit', 'motion_id': motion_id, 'motion_file_id': c3d_file.id, }, }, 'nb_annotations': len(annotations), 'annotation_perplexities': annotation_perplexities, } if motion_entry.associatedInstitution.acronym.lower() == 'cmu': # Reference actual CMU database first and provide KIT database as the mirror. data['source']['mirror_database'] = data['source']['database'] motion_id, file_id = [int(x) for x in os.path.splitext(c3d_file.fileName)[0].split('_')] data['source']['database'] = { 'identifier': 'cmu', 'motion_id': motion_id, 'motion_file_id': file_id, } json.dump(data, f) # Book-keeping. nb_annotations += len(annotations) nb_motions += 1 self.stdout.write('done') self.stdout.write('done') self.stdout.write('') # Create ZIP archive. filename = time.strftime('%Y-%m-%d') + '.zip' self.stdout.write('Exporting ZIP archive "{}" ...'.format(filename), ending=' ') self.stdout.flush() def callback_before(file): self.stdout.write(' processing file "{}" ...'.format(file), ending = ' ') self.stdout.flush() def callback_after(file): self.stdout.write('done') zipdir(tmp_path, os.path.join(DATA_PATH, filename), callback_before=callback_before, callback_after=callback_after) self.stdout.write('done') self.stdout.write('') # Create dataset entry in DB. dataset = Dataset() dataset.nb_annotations = nb_annotations dataset.nb_motions = nb_motions dataset.filename = filename dataset.filesize = os.path.getsize(os.path.join(DATA_PATH, filename)) dataset.save() # Clean up tmp directory. self.stdout.write('Cleaning up temp directory "{}" ...'.format(tmp_path), ending=' ') self.stdout.flush() shutil.rmtree(tmp_path) self.stdout.write('done') self.stdout.write('') self.stdout.write('All done, remember to collect the static files so that people can download the dataset!')
def handle(self, *args, **options): username = raw_input('MotionDB Username: '******'MotionDB Password: '******'') # Configure Ice and Connect to database. properties = Ice.createProperties(sys.argv) properties.load(ICE_CLIENT_CONFIG_PATH) init_data = Ice.InitializationData() init_data.properties = properties ic = Ice.initialize(init_data) router = Glacier2.RouterPrx.checkedCast(ic.getDefaultRouter()) session = router.createSession(username, password) db = MotionDatabase.MotionDatabaseSessionPrx.checkedCast(session) # Collect all matching C3D and MMM files. self.stdout.write('Collecting data from motion database ...') q = MotionFile.objects.filter(is_hidden=False, is_broken_reported=False, is_broken_confirmed=False) motion_ids = list(set([m.motion_db_id for m in q.all()])) all_c3d_files = [] all_mmm_files = [] all_annotations = [] all_motion_ids = [] all_database_entries = [] for idx, motion_id in enumerate(motion_ids): self.stdout.write(' {}/{} ...'.format(idx + 1, len(motion_ids)), ending=' ') self.stdout.flush() files = db.listFiles(motion_id) c3d_files = [f for f in files if f.fileType == 'Vicon C3D File'] mmm_files = [ f for f in files if f.fileType == 'Converted MMM Motion' ] for c3d_file in c3d_files: # Ensure that only visible data is exported. assert c3d_file.visibility == MotionDatabase.VisibilityLevel.Public # Fetch motion file from database. try: motion_file = MotionFile.objects.get( motion_db_file_id=c3d_file.id) except MotionFile.DoesNotExist: continue assert motion_file.motion_db_id == motion_id # Skip broken motions. if motion_file.is_broken_reported or motion_file.is_broken_confirmed: continue # Find the matching MMM file for the given C3D file. mmm_file = None for f in mmm_files: if f.originatedFrom.id == c3d_file.id: mmm_file = f break assert mmm_file is not None # Get all annotations. We include data even if it isn't annotated yet. annotations = Annotation.objects.filter( motion_file=motion_file).all() all_c3d_files.append(c3d_file) all_mmm_files.append(mmm_file) all_annotations.append(annotations) all_motion_ids.append(motion_id) all_database_entries.append(motion_file) self.stdout.write('done') n_motions = len(all_c3d_files) assert n_motions == len(all_mmm_files) assert n_motions == len(all_annotations) assert n_motions == len(all_motion_ids) assert n_motions == len(all_database_entries) self.stdout.write( 'done, obtained {} motions and their annotations'.format( n_motions)) self.stdout.write('') # Create temporary directory. tmp_path = mkdtemp() self.stdout.write('Downloading data to "{}" ...'.format(tmp_path)) motion_entry_cache = {} nb_annotations = 0 nb_motions = 0 for idx, (database_entry, c3d_file, mmm_file, annotations, motion_id) in enumerate( zip(all_database_entries, all_c3d_files, all_mmm_files, all_annotations, all_motion_ids)): self.stdout.write(' {}/{}: ...'.format(idx + 1, n_motions), ending=' ') self.stdout.flush() filename_prefix = '{0:05d}'.format(database_entry.id) filename_mmm = filename_prefix + '_mmm.xml' filename_c3d = filename_prefix + '_raw.c3d' filename_meta = filename_prefix + '_meta.json' filename_annotation = filename_prefix + '_annotations.json' # Download MMM. r = db.getFileReader(mmm_file.id) d = read_file(r) r.destroy() if d is None: return -1 with open(os.path.join(tmp_path, filename_mmm), 'wb') as f: f.write(d) # Download C3D. r = db.getFileReader(c3d_file.id) d = read_file(r) r.destroy() if d is None: return -1 with open(os.path.join(tmp_path, filename_c3d), 'wb') as f: f.write(d) # Retrieve motion information. if c3d_file.attachedToId in motion_entry_cache: motion_entry = motion_entry_cache[c3d_file.attachedToId] else: motion_entry = db.getMotion(c3d_file.attachedToId) motion_entry_cache[c3d_file.attachedToId] = motion_entry # Save annotations and extract their IDs for metadata. with open(os.path.join(tmp_path, filename_annotation), 'w') as f: json.dump([a.description for a in annotations], f) mat_annotation_ids = [a.id for a in annotations] # Save metadata. annotation_perplexities = [a.perplexity for a in annotations] assert len(annotation_perplexities) == len(annotations) with open(os.path.join(tmp_path, filename_meta), 'w') as f: data = { 'motion_annotation_tool': { 'id': database_entry.id, 'annotation_ids': mat_annotation_ids, }, 'source': { 'institution': { 'name': motion_entry.associatedInstitution.name, 'identifier': motion_entry.associatedInstitution.acronym.lower(), }, 'database': { 'identifier': 'kit', 'motion_id': motion_id, 'motion_file_id': c3d_file.id, }, }, 'nb_annotations': len(annotations), 'annotation_perplexities': annotation_perplexities, } if motion_entry.associatedInstitution.acronym.lower() == 'cmu': # Reference actual CMU database first and provide KIT database as the mirror. data['source']['mirror_database'] = data['source'][ 'database'] motion_id, file_id = [ int(x) for x in os.path.splitext(c3d_file.fileName) [0].split('_') ] data['source']['database'] = { 'identifier': 'cmu', 'motion_id': motion_id, 'motion_file_id': file_id, } json.dump(data, f) # Book-keeping. nb_annotations += len(annotations) nb_motions += 1 self.stdout.write('done') self.stdout.write('done') self.stdout.write('') # Create ZIP archive. filename = time.strftime('%Y-%m-%d') + '.zip' self.stdout.write('Exporting ZIP archive "{}" ...'.format(filename), ending=' ') self.stdout.flush() def callback_before(file): self.stdout.write(' processing file "{}" ...'.format(file), ending=' ') self.stdout.flush() def callback_after(file): self.stdout.write('done') zipdir(tmp_path, os.path.join(DATA_PATH, filename), callback_before=callback_before, callback_after=callback_after) self.stdout.write('done') self.stdout.write('') # Create dataset entry in DB. dataset = Dataset() dataset.nb_annotations = nb_annotations dataset.nb_motions = nb_motions dataset.filename = filename dataset.filesize = os.path.getsize(os.path.join(DATA_PATH, filename)) dataset.save() # Clean up tmp directory. self.stdout.write( 'Cleaning up temp directory "{}" ...'.format(tmp_path), ending=' ') self.stdout.flush() shutil.rmtree(tmp_path) self.stdout.write('done') self.stdout.write('') self.stdout.write( 'All done, remember to collect the static files so that people can download the dataset!' )