Python Dataset Examples, dataset.models.Dataset Python Examples

Example #1

0

Show file

File: views.py Project: chrisfilo/open_fmri

 def form_valid(self, form):
     user_request = form.save()
     dataset = Dataset()
     dataset.project_name = "Request for " + user_request.user_email_address
     dataset.save()
     user_request.dataset = dataset
     user_request.save()
     return super(UserDataRequestCreate, self).form_valid(form)

Example #2

0

Show file

 def form_valid(self, form):
     user_request = form.save()
     dataset = Dataset()
     dataset.project_name = "Request for " + user_request.user_email_address
     dataset.save()
     user_request.dataset = dataset
     user_request.save()
     return super(UserDataRequestCreate, self).form_valid(form)

Example #3

0

Show file

def dataset():
    form = UploadForm()
    user_id = session.get('id')
    user = User.query.filter_by(
        id=user_id,
    ).first()
    if (request.method == 'POST'):
        if form.validate_on_submit() and 'archive' in request.files:
            for archived_file in request.files.getlist('archive'):
                filename = archives.save(archived_file)
                logging.info(filename)
                print('request.files ', filename)
                dataset = Dataset(
                    name=form.name.data,
                    description=form.description.data,
                    filename=filename,
                    user_id=user_id,
                )
                try:
                    db.session.add(dataset)
                    db.session.commit()
                    flash('Dataset uploaded successfully!', 'success')
                except Exception as e:
                    print(e)
                    db.session.rollback()
                    error = 'Error Uploading the file.'
                    flash(error, 'error')
        else:
            error = 'Invalid File, ZIP and RAR Files only!'
            flash(error, 'error')
    files = Dataset.query.all()
    return render_template('dataset/dataset.html', files=files, archives=archives, form=form, user=user)

Example #4

0

Show file

File: views.py Project: PetPen-project/PetPen

def examples_dataset(request):
    datasets = Dataset.objects.filter(user=User.objects.get(pk=17))
    context = {'datasets': datasets}
    if request.GET.get('description'):
        return HttpResponse(
            datasets.get(pk=request.GET['description']).description)
    if request.method == "POST":
        dataset = Dataset.objects.get(pk=request.POST['id'])
        if Dataset.objects.filter(user=request.user, title=dataset.title):
            context[
                'error_message'] = 'dataset with title {} already exists!'.format(
                    dataset.title)
            return render(request, 'petpen/example_dataset.html', context)
        dataset_dir = "datasets/{}/{}/".format(request.user.id, dataset.title)
        newDataset = Dataset(
            title=dataset.title,
            user=request.user,
            training_input_file=File(
                dataset.training_input_file,
                op.split(dataset.training_input_file.name)[1]),
            training_output_file=File(
                dataset.training_output_file,
                op.split(dataset.training_output_file.name)[1]),
            testing_input_file=File(
                dataset.testing_input_file,
                op.split(dataset.testing_input_file.name)[1]),
            testing_output_file=File(
                dataset.testing_output_file,
                op.split(dataset.testing_output_file.name)[1]),
            train_input_size=dataset.train_input_size,
            test_input_size=dataset.test_input_size,
            train_output_size=dataset.train_output_size,
            test_output_size=dataset.test_output_size,
            train_samples=dataset.train_samples,
            test_samples=dataset.test_samples,
            input_shape=dataset.input_shape,
            output_shape=dataset.output_shape,
            description=dataset.description,
            filetype=dataset.filetype,
            is_image=dataset.is_image)
        newDataset.save()
        # shutil.copytree(op.join(MEDIA_ROOT,"models/{}/{}/".format(17,project.title)),op.join(MEDIA_ROOT,model_dir))
        context[
            'info'] = 'The example dataset {} is copied into your account.'.format(
                dataset.title)
    return render(request, 'petpen/example_dataset.html', context)

Example #5

0

Show file

    def __init__(self, args):
        self._training_set = Dataset(args.training_set, args)
        self._test_set = Dataset(args.test_set, args)

        # Set target names.
        self._target_names = ['Positive', 'Negative']
        if args.include_neutral:
            self._target_names.append('Neutral')

        # Set n-gram length.
        ngram_length = ()
        if args.ngram_length == 'unigram':
            ngram_length = (1, 1)
        elif args.ngram_length == 'bigram':
            ngram_length = (2, 2)
        elif args.ngram_length == 'trigram':
            ngram_length = (3, 3)

        # Set vectorizer.
        if args.vectorizer == 'tf-idf':
            self._vectorizer = TfidfVectorizer(tokenizer=Lemmatizer(args), ngram_range=ngram_length)
        elif args.vectorizer == 'count':
            self._vectorizer = CountVectorizer(tokenizer=Lemmatizer(args), ngram_range=ngram_length)

Example #6

0

Show file

def export_to_excel(dataset: Dataset) -> str:
    """
    Given a Dataset object, export its `file` in excel and return the file name

    Return: file_name with extension in str format
    """

    dataframe = dataset.get_dataframe()

    current_timestamp = timezone.now()
    file_name_formatter = '%Y_%m_%d_%I_%M_%S_%p'

    file_name = f'{current_timestamp.strftime(file_name_formatter)}.xls'

    # generate excel file
    dataframe.to_excel(file_name)

    return file_name

Example #7

0

Show file

File: exportdataset.py Project: matthiasplappert/motion-annotation-tool

	def handle(self, *args, **options):
		username = raw_input('MotionDB Username: '******'MotionDB Password: '******'')
		
		# Configure Ice and Connect to database.
		properties = Ice.createProperties(sys.argv)
		properties.load(ICE_CLIENT_CONFIG_PATH)
		init_data = Ice.InitializationData()
		init_data.properties = properties
		ic = Ice.initialize(init_data)
		router = Glacier2.RouterPrx.checkedCast(ic.getDefaultRouter())
		session = router.createSession(username, password)
		db = MotionDatabase.MotionDatabaseSessionPrx.checkedCast(session)

		# Collect all matching C3D and MMM files.
		self.stdout.write('Collecting data from motion database ...')
		q = MotionFile.objects.filter(is_hidden=False, is_broken_reported=False, is_broken_confirmed=False)
		motion_ids = list(set([m.motion_db_id for m in q.all()]))
		all_c3d_files = []
		all_mmm_files = []
		all_annotations = []
		all_motion_ids = []
		all_database_entries = []
		for idx, motion_id in enumerate(motion_ids):
			self.stdout.write(' {}/{} ...'.format(idx + 1, len(motion_ids)), ending= ' ')
			self.stdout.flush()
			files = db.listFiles(motion_id)
			c3d_files = [f for f in files if f.fileType == 'Vicon C3D File']
			mmm_files = [f for f in files if f.fileType == 'Converted MMM Motion']
			
			for c3d_file in c3d_files:
				# Ensure that only visible data is exported.
				assert c3d_file.visibility == MotionDatabase.VisibilityLevel.Public

				# Fetch motion file from database.
				try:
					motion_file = MotionFile.objects.get(motion_db_file_id=c3d_file.id)
				except MotionFile.DoesNotExist:
					continue
				assert motion_file.motion_db_id == motion_id

				# Skip broken motions.
				if motion_file.is_broken_reported or motion_file.is_broken_confirmed:
					continue

				# Find the matching MMM file for the given C3D file.
				mmm_file = None
				for f in mmm_files:
					if f.originatedFrom.id == c3d_file.id:
						mmm_file = f
						break
				assert mmm_file is not None

				# Get all annotations. We include data even if it isn't annotated yet.
				annotations = Annotation.objects.filter(motion_file=motion_file).all()

				all_c3d_files.append(c3d_file)
				all_mmm_files.append(mmm_file)
				all_annotations.append(annotations)
				all_motion_ids.append(motion_id)
				all_database_entries.append(motion_file)
			self.stdout.write('done')
		n_motions = len(all_c3d_files)
		assert n_motions == len(all_mmm_files)
		assert n_motions == len(all_annotations)
		assert n_motions == len(all_motion_ids)
		assert n_motions == len(all_database_entries)
		self.stdout.write('done, obtained {} motions and their annotations'.format(n_motions))
		self.stdout.write('')

		# Create temporary directory.
		tmp_path = mkdtemp()
		self.stdout.write('Downloading data to "{}" ...'.format(tmp_path))
		motion_entry_cache = {}
		nb_annotations = 0
		nb_motions = 0
		for idx, (database_entry, c3d_file, mmm_file, annotations, motion_id) in enumerate(zip(all_database_entries, all_c3d_files, all_mmm_files, all_annotations, all_motion_ids)):
			self.stdout.write('  {}/{}: ...'.format(idx + 1, n_motions), ending=' ')
			self.stdout.flush()
			filename_prefix = '{0:05d}'.format(database_entry.id)
			filename_mmm = filename_prefix + '_mmm.xml'
			filename_c3d = filename_prefix + '_raw.c3d'
			filename_meta = filename_prefix + '_meta.json'
			filename_annotation = filename_prefix + '_annotations.json'

			# Download MMM.
			r = db.getFileReader(mmm_file.id)
			d = read_file(r)
			r.destroy()
			if d is None:
				return -1
			with open(os.path.join(tmp_path, filename_mmm), 'wb') as f:
				f.write(d)

			# Download C3D.
			r = db.getFileReader(c3d_file.id)
			d = read_file(r)
			r.destroy()
			if d is None:
				return -1
			with open(os.path.join(tmp_path, filename_c3d), 'wb') as f:
				f.write(d)

			# Retrieve motion information.
			if c3d_file.attachedToId in motion_entry_cache:
				motion_entry = motion_entry_cache[c3d_file.attachedToId]
			else:
				motion_entry = db.getMotion(c3d_file.attachedToId)
				motion_entry_cache[c3d_file.attachedToId] = motion_entry

			# Save annotations and extract their IDs for metadata.
			with open(os.path.join(tmp_path, filename_annotation), 'w') as f:
				json.dump([a.description for a in annotations], f)
			mat_annotation_ids = [a.id for a in annotations]
				
			# Save metadata.
			annotation_perplexities = [a.perplexity for a in annotations]
			assert len(annotation_perplexities) == len(annotations)
			with open(os.path.join(tmp_path, filename_meta), 'w') as f:
				data = {
					'motion_annotation_tool': {
						'id': database_entry.id,
						'annotation_ids': mat_annotation_ids,
					},
					'source': {
						'institution': {
							'name': motion_entry.associatedInstitution.name,
							'identifier': motion_entry.associatedInstitution.acronym.lower(),
						},
						'database': {
							'identifier': 'kit',
							'motion_id': motion_id,
							'motion_file_id': c3d_file.id,
						},
					},
					'nb_annotations': len(annotations),
					'annotation_perplexities': annotation_perplexities,
				}
				if motion_entry.associatedInstitution.acronym.lower() == 'cmu':
					# Reference actual CMU database first and provide KIT database as the mirror.
					data['source']['mirror_database'] = data['source']['database']
					motion_id, file_id = [int(x) for x in os.path.splitext(c3d_file.fileName)[0].split('_')]
					data['source']['database'] = {
						'identifier': 'cmu',
						'motion_id': motion_id,
						'motion_file_id': file_id,
					}
				json.dump(data, f)

				# Book-keeping.
				nb_annotations += len(annotations)
				nb_motions += 1
			self.stdout.write('done')
		self.stdout.write('done')
		self.stdout.write('')

		# Create ZIP archive.
		filename = time.strftime('%Y-%m-%d') + '.zip'
		self.stdout.write('Exporting ZIP archive "{}" ...'.format(filename), ending=' ')
		self.stdout.flush()
		def callback_before(file):
			self.stdout.write('  processing file "{}" ...'.format(file), ending = ' ')
			self.stdout.flush()
		def callback_after(file):
			self.stdout.write('done')
		zipdir(tmp_path, os.path.join(DATA_PATH, filename), callback_before=callback_before, callback_after=callback_after)
		self.stdout.write('done')
		self.stdout.write('')

		# Create dataset entry in DB.
		dataset = Dataset()
		dataset.nb_annotations = nb_annotations
		dataset.nb_motions = nb_motions
		dataset.filename = filename
		dataset.filesize = os.path.getsize(os.path.join(DATA_PATH, filename))
		dataset.save()

		# Clean up tmp directory.
		self.stdout.write('Cleaning up temp directory "{}" ...'.format(tmp_path), ending=' ')
		self.stdout.flush()
		shutil.rmtree(tmp_path)
		self.stdout.write('done')
		self.stdout.write('')

		self.stdout.write('All done, remember to collect the static files so that people can download the dataset!')

Example #8

0

Show file

    def handle(self, *args, **options):
        username = raw_input('MotionDB Username: '******'MotionDB Password: '******'')

        # Configure Ice and Connect to database.
        properties = Ice.createProperties(sys.argv)
        properties.load(ICE_CLIENT_CONFIG_PATH)
        init_data = Ice.InitializationData()
        init_data.properties = properties
        ic = Ice.initialize(init_data)
        router = Glacier2.RouterPrx.checkedCast(ic.getDefaultRouter())
        session = router.createSession(username, password)
        db = MotionDatabase.MotionDatabaseSessionPrx.checkedCast(session)

        # Collect all matching C3D and MMM files.
        self.stdout.write('Collecting data from motion database ...')
        q = MotionFile.objects.filter(is_hidden=False,
                                      is_broken_reported=False,
                                      is_broken_confirmed=False)
        motion_ids = list(set([m.motion_db_id for m in q.all()]))
        all_c3d_files = []
        all_mmm_files = []
        all_annotations = []
        all_motion_ids = []
        all_database_entries = []
        for idx, motion_id in enumerate(motion_ids):
            self.stdout.write(' {}/{} ...'.format(idx + 1, len(motion_ids)),
                              ending=' ')
            self.stdout.flush()
            files = db.listFiles(motion_id)
            c3d_files = [f for f in files if f.fileType == 'Vicon C3D File']
            mmm_files = [
                f for f in files if f.fileType == 'Converted MMM Motion'
            ]

            for c3d_file in c3d_files:
                # Ensure that only visible data is exported.
                assert c3d_file.visibility == MotionDatabase.VisibilityLevel.Public

                # Fetch motion file from database.
                try:
                    motion_file = MotionFile.objects.get(
                        motion_db_file_id=c3d_file.id)
                except MotionFile.DoesNotExist:
                    continue
                assert motion_file.motion_db_id == motion_id

                # Skip broken motions.
                if motion_file.is_broken_reported or motion_file.is_broken_confirmed:
                    continue

                # Find the matching MMM file for the given C3D file.
                mmm_file = None
                for f in mmm_files:
                    if f.originatedFrom.id == c3d_file.id:
                        mmm_file = f
                        break
                assert mmm_file is not None

                # Get all annotations. We include data even if it isn't annotated yet.
                annotations = Annotation.objects.filter(
                    motion_file=motion_file).all()

                all_c3d_files.append(c3d_file)
                all_mmm_files.append(mmm_file)
                all_annotations.append(annotations)
                all_motion_ids.append(motion_id)
                all_database_entries.append(motion_file)
            self.stdout.write('done')
        n_motions = len(all_c3d_files)
        assert n_motions == len(all_mmm_files)
        assert n_motions == len(all_annotations)
        assert n_motions == len(all_motion_ids)
        assert n_motions == len(all_database_entries)
        self.stdout.write(
            'done, obtained {} motions and their annotations'.format(
                n_motions))
        self.stdout.write('')

        # Create temporary directory.
        tmp_path = mkdtemp()
        self.stdout.write('Downloading data to "{}" ...'.format(tmp_path))
        motion_entry_cache = {}
        nb_annotations = 0
        nb_motions = 0
        for idx, (database_entry, c3d_file, mmm_file, annotations,
                  motion_id) in enumerate(
                      zip(all_database_entries, all_c3d_files, all_mmm_files,
                          all_annotations, all_motion_ids)):
            self.stdout.write('  {}/{}: ...'.format(idx + 1, n_motions),
                              ending=' ')
            self.stdout.flush()
            filename_prefix = '{0:05d}'.format(database_entry.id)
            filename_mmm = filename_prefix + '_mmm.xml'
            filename_c3d = filename_prefix + '_raw.c3d'
            filename_meta = filename_prefix + '_meta.json'
            filename_annotation = filename_prefix + '_annotations.json'

            # Download MMM.
            r = db.getFileReader(mmm_file.id)
            d = read_file(r)
            r.destroy()
            if d is None:
                return -1
            with open(os.path.join(tmp_path, filename_mmm), 'wb') as f:
                f.write(d)

            # Download C3D.
            r = db.getFileReader(c3d_file.id)
            d = read_file(r)
            r.destroy()
            if d is None:
                return -1
            with open(os.path.join(tmp_path, filename_c3d), 'wb') as f:
                f.write(d)

            # Retrieve motion information.
            if c3d_file.attachedToId in motion_entry_cache:
                motion_entry = motion_entry_cache[c3d_file.attachedToId]
            else:
                motion_entry = db.getMotion(c3d_file.attachedToId)
                motion_entry_cache[c3d_file.attachedToId] = motion_entry

            # Save annotations and extract their IDs for metadata.
            with open(os.path.join(tmp_path, filename_annotation), 'w') as f:
                json.dump([a.description for a in annotations], f)
            mat_annotation_ids = [a.id for a in annotations]

            # Save metadata.
            annotation_perplexities = [a.perplexity for a in annotations]
            assert len(annotation_perplexities) == len(annotations)
            with open(os.path.join(tmp_path, filename_meta), 'w') as f:
                data = {
                    'motion_annotation_tool': {
                        'id': database_entry.id,
                        'annotation_ids': mat_annotation_ids,
                    },
                    'source': {
                        'institution': {
                            'name':
                            motion_entry.associatedInstitution.name,
                            'identifier':
                            motion_entry.associatedInstitution.acronym.lower(),
                        },
                        'database': {
                            'identifier': 'kit',
                            'motion_id': motion_id,
                            'motion_file_id': c3d_file.id,
                        },
                    },
                    'nb_annotations': len(annotations),
                    'annotation_perplexities': annotation_perplexities,
                }
                if motion_entry.associatedInstitution.acronym.lower() == 'cmu':
                    # Reference actual CMU database first and provide KIT database as the mirror.
                    data['source']['mirror_database'] = data['source'][
                        'database']
                    motion_id, file_id = [
                        int(x) for x in os.path.splitext(c3d_file.fileName)
                        [0].split('_')
                    ]
                    data['source']['database'] = {
                        'identifier': 'cmu',
                        'motion_id': motion_id,
                        'motion_file_id': file_id,
                    }
                json.dump(data, f)

                # Book-keeping.
                nb_annotations += len(annotations)
                nb_motions += 1
            self.stdout.write('done')
        self.stdout.write('done')
        self.stdout.write('')

        # Create ZIP archive.
        filename = time.strftime('%Y-%m-%d') + '.zip'
        self.stdout.write('Exporting ZIP archive "{}" ...'.format(filename),
                          ending=' ')
        self.stdout.flush()

        def callback_before(file):
            self.stdout.write('  processing file "{}" ...'.format(file),
                              ending=' ')
            self.stdout.flush()

        def callback_after(file):
            self.stdout.write('done')

        zipdir(tmp_path,
               os.path.join(DATA_PATH, filename),
               callback_before=callback_before,
               callback_after=callback_after)
        self.stdout.write('done')
        self.stdout.write('')

        # Create dataset entry in DB.
        dataset = Dataset()
        dataset.nb_annotations = nb_annotations
        dataset.nb_motions = nb_motions
        dataset.filename = filename
        dataset.filesize = os.path.getsize(os.path.join(DATA_PATH, filename))
        dataset.save()

        # Clean up tmp directory.
        self.stdout.write(
            'Cleaning up temp directory "{}" ...'.format(tmp_path), ending=' ')
        self.stdout.flush()
        shutil.rmtree(tmp_path)
        self.stdout.write('done')
        self.stdout.write('')

        self.stdout.write(
            'All done, remember to collect the static files so that people can download the dataset!'
        )