Exemple #1
0
def download_file(url_to_file):
    """Download a Dataverse file and return the filename"""
    """
    import re
    d = r.headers['content-disposition']
    fname = re.findall("filename=(.+)", d)
    """
    file_handle, filepath = tempfile.mkstemp()

    msgt('download file: %s' % url_to_file)

    r = requests.get(url_to_file, stream=True)

    if r.status_code != 200:
        msg('bad status: %s' % r.status_code)
        if isfile(filepath):
            make_sure_file_deleted(filepath)
        return None, None

    file_ext = None
    content_dict = r.headers['content-disposition']
    #print 'content_dict', content_dict
    #fname = re.findall("filename=(.+)", content_dict)
    fname = format_file_name(content_dict)
    if fname:
        file_ext = fname.split('.')[-1].lower()
    print 'file_ext', file_ext

    with os.fdopen(file_handle, 'wb') as tmp:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                tmp.write(chunk)

    msg('File downloaded: %s' % filepath)
    return filepath, file_ext
    def compare_dicts(self, section, new_dict, old_dict, **kwargs):
        """Compare two dicts, noting whether
        a key/value was Removed, Added, or Modified
        Note: Attempts to preserve key order--dict is usually an OrderedDict
        """
        # optional: get keys to skip
        skip_list = kwargs.get('skip_list', [])

        old_dict_keys = [key for key, val in old_dict.items()]
        new_dict_keys = [key for key, val in new_dict.items()]
        #old_dict_keys = set(old_dict.keys())
        #new_dict_keys = set(new_dict.keys())

        # ------------------------------
        # added attributes
        # ------------------------------
        added = [key for key in new_dict_keys if key not in old_dict_keys]
        #new_dict_keys - old_dict_keys
        for added_key in added:
            if added_key not in skip_list:
                self.record_diff_desc_added(\
                                section,
                                added_key,
                                new_dict[added_key])

        # ------------------------------
        # Removed attributes
        # ------------------------------
        removed = [key for key in old_dict_keys if key not in new_dict_keys]
        for removed_key in removed:
            if removed_key not in skip_list:
                self.record_diff_desc_removed(\
                                section,
                                removed_key,
                                old_dict[removed_key])

        # ------------------------------
        # Modified attributes
        # ------------------------------
        intersect_keys = [key for key in new_dict_keys\
                          if key not in removed and key not in added]
        mod_keys = [shared_key for shared_key in intersect_keys \
                    if old_dict[shared_key] != new_dict[shared_key]]

        for mod_key in mod_keys:
            msg('...> %s %s' % (mod_key, type(old_dict[mod_key])))
            # Is the value another dict?
            #
            if mod_key not in skip_list:
                self.compare_items(\
                            section,
                            mod_key,
                            new_dict[mod_key],
                            old_dict[mod_key])
    def compare_dicts(self, section, new_dict, old_dict, **kwargs):
        """Compare two dicts, noting whether
        a key/value was Removed, Added, or Modified
        Note: Attempts to preserve key order--dict is usually an OrderedDict
        """
        # optional: get keys to skip
        skip_list = kwargs.get('skip_list', [])

        old_dict_keys = [key for key, val in old_dict.items()]
        new_dict_keys = [key for key, val in new_dict.items()]
        #old_dict_keys = set(old_dict.keys())
        #new_dict_keys = set(new_dict.keys())

        # ------------------------------
        # added attributes
        # ------------------------------
        added = [key for key in new_dict_keys if key not in old_dict_keys]
        #new_dict_keys - old_dict_keys
        for added_key in added:
            if added_key not in skip_list:
                self.record_diff_desc_added(\
                                section,
                                added_key,
                                new_dict[added_key])

        # ------------------------------
        # Removed attributes
        # ------------------------------
        removed = [key for key in old_dict_keys if key not in new_dict_keys]
        for removed_key in removed:
            if removed_key not in skip_list:
                self.record_diff_desc_removed(\
                                section,
                                removed_key,
                                old_dict[removed_key])

        # ------------------------------
        # Modified attributes
        # ------------------------------
        intersect_keys = [key for key in new_dict_keys\
                          if key not in removed and key not in added]
        mod_keys = [shared_key for shared_key in intersect_keys \
                    if old_dict[shared_key] != new_dict[shared_key]]

        for mod_key in mod_keys:
            msg('...> %s %s' % (mod_key, type(old_dict[mod_key])))
            # Is the value another dict?
            #
            if mod_key not in skip_list:
                self.compare_items(\
                            section,
                            mod_key,
                            new_dict[mod_key],
                            old_dict[mod_key])
    def write_files_to_mongo(self, **kwargs):
        """Write the saved dataset files to Mongo"""
        client = MongoClient()
        db = client.dataverse_database
        collection = db.datasets

        # look at kwargs
        #
        dataset_start_id = kwargs.get('dataset_start_id', 0)
        delete_all = kwargs.get('delete_all', False)

        # If appropriate, Delete existing records
        #
        if delete_all:
            msgt('Deleting current records')
            result = collection.delete_many({})
            msg('result.deleted_count: %s' % result.deleted_count)
            return

        fnames = os.listdir(self.output_dir)
        fnames = [
            x for x in fnames if x.endswith('.json') and x.startswith('ds_')
        ]
        fnames.sort()

        start_time = int(time.time())  # epoch seconds

        cnt = 0
        for fname in fnames:
            cnt += 1
            ds_id = int(fname.split('.')[0].split('_')[1])

            msgt('(%d) process dataset %s (%s)' % (cnt, ds_id, fname))

            if ds_id < dataset_start_id:
                msg('skipping it')
                continue

            content = open(join(self.output_dir, fname), 'r').read()
            content = update_json_text(content)
            content_doc = json.loads(content, object_pairs_hook=OrderedDict)
            content_doc['_id'] = ds_id
            content_doc['dtype'] = 'dataset'

            #doc_id = collection.insert_one(content_doc).inserted_id
            #doc_id = collection.save(content_doc)   #.inserted_id
            doc_id = collection.save(content_doc)
            if cnt % 500 == 0:
                self.show_elapsed_time(start_time)
        self.show_elapsed_time(start_time)
    def write_files_to_mongo(self, **kwargs):
        """Write the saved dataset files to Mongo"""
        client = MongoClient()
        db = client.dataverse_database
        collection = db.datasets

        # look at kwargs
        #
        dataset_start_id = kwargs.get('dataset_start_id', 0)
        delete_all = kwargs.get('delete_all', False)

        # If appropriate, Delete existing records
        #
        if delete_all:
            msgt('Deleting current records')
            result = collection.delete_many({})
            msg('result.deleted_count: %s' %  result.deleted_count)
            return

        fnames = os.listdir(self.output_dir)
        fnames = [x for x in fnames if x.endswith('.json') and x.startswith('ds_')]
        fnames.sort()

        start_time = int(time.time()) # epoch seconds

        cnt = 0
        for fname in fnames:
            cnt += 1
            ds_id = int(fname.split('.')[0].split('_')[1])

            msgt('(%d) process dataset %s (%s)' % (cnt, ds_id, fname))

            if ds_id < dataset_start_id:
                msg('skipping it')
                continue

            content = open(join(self.output_dir, fname), 'r').read()
            content = update_json_text(content)
            content_doc = json.loads(content, object_pairs_hook=OrderedDict)
            content_doc['_id'] = ds_id
            content_doc['dtype'] = 'dataset'

            #doc_id = collection.insert_one(content_doc).inserted_id
            #doc_id = collection.save(content_doc)   #.inserted_id
            doc_id = collection.save(content_doc)
            if cnt % 500 == 0:
                self.show_elapsed_time(start_time)
        self.show_elapsed_time(start_time)
    def run_comparison(self):
        """Compare the two JSON datasets"""
        msgt(self.run_comparison.__doc__)

        # Run a quick check to see if the dicts are the same.
        #
        if cmp(self.old_ds, self.new_ds) == 0:
            msg('No differences!')
            return

        new_files_list = self.new_ds.pop('files', [])
        old_files_list = self.old_ds.pop('files', [])
        #print 'new_files_list', new_files_list

        self.compare_dicts(\
                    '',
                    self.new_ds,
                    self.old_ds)

        self.compare_file_lists(\
                    new_files_list,
                    old_files_list)
def download_file(url_to_file):
    """Download a Dataverse file and return the filename"""

    file_handle, filepath = tempfile.mkstemp()

    msgt('download file: %s' % url_to_file)

    r = requests.get(url_to_file, stream=True)

    if r.status_code != 200:
        msg('bad status: %s' % r.status_code)
        if isfile(filepath):
            make_sure_file_deleted(filepath)
        return None

    with os.fdopen(file_handle, 'wb') as tmp:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                tmp.write(chunk)

    msg('File downloaded: %s' % filepath)
    return filepath
    def run_comparison(self):
        """Compare the two JSON datasets"""
        msgt(self.run_comparison.__doc__)

        # Run a quick check to see if the dicts are the same.
        #
        if cmp(self.old_ds, self.new_ds) == 0:
            msg('No differences!')
            return

        new_files_list = self.new_ds.pop('files', [])
        old_files_list = self.old_ds.pop('files', [])
        #print 'new_files_list', new_files_list

        self.compare_dicts(\
                    '',
                    self.new_ds,
                    self.old_ds)

        self.compare_file_lists(\
                    new_files_list,
                    old_files_list)
Exemple #9
0
def download_file(url_to_file):
    """Download a Dataverse file and return the filename"""


    """
    import re
    d = r.headers['content-disposition']
    fname = re.findall("filename=(.+)", d)
    """
    file_handle, filepath = tempfile.mkstemp()

    msgt('download file: %s' % url_to_file)

    r = requests.get(url_to_file, stream=True)

    if r.status_code != 200:
        msg('bad status: %s' % r.status_code)
        if isfile(filepath):
            make_sure_file_deleted(filepath)
        return None, None

    file_ext = None
    content_dict = r.headers['content-disposition']
    #print 'content_dict', content_dict
    #fname = re.findall("filename=(.+)", content_dict)
    fname = format_file_name(content_dict)
    if fname:
        file_ext = fname.split('.')[-1].lower()
    print 'file_ext', file_ext

    with os.fdopen(file_handle, 'wb') as tmp:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                tmp.write(chunk)

    msg('File downloaded: %s' % filepath)
    return filepath, file_ext
    def show(self, show_section=True):
        """print info"""

        if show_section:
            msgt('%s: [%s] %s' % (self.section, self.attr_name, self.note))

        msg('attribute: %s' % self.attr_name)
        msg('\nnew: %s' % self.new_val)
        msg('\nold: %s' % self.old_val)
        dashes()
Exemple #11
0
    def show(self, show_section=True):
        """print info"""

        if show_section:
            msgt('%s: [%s] %s' % (self.section, self.attr_name, self.note))

        msg('attribute: %s' % self.attr_name)
        msg('\nnew: %s' % self.new_val)
        msg('\nold: %s' % self.old_val)
        dashes()
Exemple #12
0
    def get_count_broken_notifications():
        """
        Query each object type and make sure notifications aren't broken

        Example map
        { 'DvObject': [1],
          'Dataverse': [2],
          'Dataset': [14, 11], 'DatasetVersion': [13, 12, 7],
          'DataFile': [9]
         }

        """
        broken_cnt = 0
        user_ids = []
        for model_name, type_id_list in get_dv_object_to_object_id_map().items(
        ):

            #   Get a list of object ids for this model type
            #   that were not emailed--e.g. should show up
            #   on the notifications pages
            #
            msgt('check: %s %s' % (model_name, type_id_list))
            model_user_id_list = UserNotification.objects.select_related('user'\
                                        ).filter(\
                                        object_type__in=type_id_list,
                                        ).values_list('objectid', 'user__id')

            model_id_list = [x[0] for x in model_user_id_list]

            user_ids += [x[1] for x in model_user_id_list]

            msg('model_id_list len: %s' % len(model_id_list))
            if len(model_id_list) == 0:
                continue

            # Used for later "bad notice" counts
            notice_counter = Counter(model_id_list)
            msg('notice_counter len: %s' % len(notice_counter))

            unique_id_list = list(set(model_id_list))
            msg('unique_id_list len: %s' % len(unique_id_list))

            # Need to upgrade apps files and not use this method
            model_class = eval(model_name)
            if model_name in ['DvObject', 'DatasetVersion', 'FileMetadata']:
                existing_ids = model_class.objects.filter(id__in=unique_id_list\
                                            ).values_list('id', flat=True\
                                            ).distinct()
            else:
                existing_ids = model_class.objects.select_related('dvobject'\
                                    ).filter(dvobject__id__in=unique_id_list\
                                    ).values_list('dvobject__id', flat=True\
                                    ).distinct()

            msg('existing_ids len: %s' % len(existing_ids))

            if len(unique_id_list) == len(existing_ids):
                # Looks good!
                continue

            missing_ids = list(set(unique_id_list) - set(existing_ids))
            for missing_id in missing_ids:
                broken_cnt += notice_counter.get(missing_id, 0)

        unique_user_ids = len(set(user_ids))

        return (broken_cnt, unique_user_ids)
    def make_json_files(self):

        # Set publication status
        #
        filters = {}
        if self.published_only:
            filters.update(query_helper.get_is_published_filter_param())

        # Query for dataset ids
        #
        ds_id_query = Dataset.objects.filter(**filters\
                            ).annotate(ds_id=F('dvobject__id')\
                            ).values_list('ds_id', flat=True\
                            ).order_by('ds_id')

        # Iterate through dataset ids
        #
        #start_time = datetime.now()
        start_time = int(time.time())  # epoch seconds

        cnt = 0
        no_versions_found_list = [45900]

        for ds_id in ds_id_query:
            cnt += 1
            msgt('(%d) Checking dataset id %s' % (cnt, ds_id))
            if ds_id < self.dataset_start_id:
                msg('skipping...(start at dataset id: %d)' %
                    self.dataset_start_id)
                continue

            # Create file name
            #
            fname = 'ds_%s.json' % (str(ds_id).zfill(8))
            full_fname = join(OUTPUT_DIR, fname)

            # Should we overwrite the existing file?
            #
            if isfile(full_fname) and not self.overwrite_existing_files:
                msg('skipping...file already exists')
                continue

            dataset_version = get_latest_dataset_version(ds_id)

            if dataset_version is None:
                msg("Could not find dataset_version!")
                no_versions_found_list.append(ds_id)
                continue

            dataset_as_json = DatasetSerializer(dataset_version).as_json()

            open(full_fname, 'w').write(json.dumps(dataset_as_json, indent=4))
            msg('File written: %s' % full_fname)

            if cnt % 500 == 0:
                self.show_elapsed_time(start_time)
            #if cnt > 10:
            #    self.show_elapsed_time(start_time)
            #    break

        self.show_elapsed_time(start_time)
        print 'no_versions_found_list: %s' % no_versions_found_list
Exemple #14
0
    def get_basic_stats():


        cnt_read_notifications = UserNotification.objects.filter(\
                                    readnotification=True,
                                    ).count()

        cnt_unread_notifications = UserNotification.objects.filter(\
                                    readnotification=False,
                                    ).count()

        cnt_undated_notifications = UserNotification.objects.filter(\
                                    senddate__isnull=True
                                    ).count()


        day_cnt_1 = 365
        day_cnt_1_date = datetime.now() - timedelta(days=day_cnt_1)

        day_cnt_2 = 180
        day_cnt_2_date = datetime.now() - timedelta(days=day_cnt_2)

        cnt_old_unread_notifications = UserNotification.objects.filter(\
                                    readnotification=False,
                                    senddate__lt=day_cnt_1_date
                                    ).count()

        cnt_old_unread_notifications2 = UserNotification.objects.filter(\
                                    readnotification=False,
                                    senddate__lt=day_cnt_2_date
                                    ).count()


        broken_cnt, impacted_users = NotificationStats.get_count_broken_notifications()
        msg('broken_cnt: %s' % broken_cnt)
        msg('impacted_users: %s' % impacted_users)


        file_stats = dict(\

            cnt_broken_notifications=NamedStat(\
                                'Broken Notifications / Impacted Users',
                                broken_cnt,
                                ('The notification refers to an object that'
                                 ' no longer exists.  These notifications should'
                                 ' be deleted from the database. (May be'
                                 ' responsible for some users who receive an'
                                 ' error when clicking on the notifications'
                                 ' tab.)'),
                                'view_broken_notifications',
                                **dict(stat2=impacted_users)),

            cnt_read_notifications=NamedStat(\
                                'Read Notifications',
                                cnt_read_notifications,
                                ('Count of read notifications'),
                                None),

            cnt_unread_notifications=NamedStat(\
                                'All Unread Notifications',
                                cnt_unread_notifications,
                                ('Count of unread notifications.'),
                                None),

            cnt_unread_old_notifications=NamedStat(\
                                'Unread: Older than %s Days' % day_cnt_1,
                                cnt_old_unread_notifications,
                                ('Count of'
                                 ' notifications <b>older'
                                 ' than %d days</b>') % day_cnt_1,
                                None),

            cnt_old_unread_notifications2=NamedStat(\
                                'Unread: Older than %s Days' % day_cnt_2,
                                cnt_old_unread_notifications2,
                                ('Count of'
                                 ' notifications <b>older'
                                 ' than %d days</b>') % day_cnt_2,
                                None),

            cnt_undated_notifications=NamedStat(\
                                'Undated Notifications',
                                cnt_undated_notifications,
                                ('Count of undated notifications'),
                                None),
            #cnt_harvested_zero=NamedStat(\
            #                    'Filesize 0 (Harvested)',
            #                    cnt_harvested_zero,
            #                    ('Count of harvested Datafiles displaying a'
            #                     ' size of 0 bytes')),
            )

        return file_stats
Exemple #15
0
    def get_count_broken_notifications():
        """
        Query each object type and make sure notifications aren't broken

        Example map
        { 'DvObject': [1],
          'Dataverse': [2],
          'Dataset': [14, 11], 'DatasetVersion': [13, 12, 7],
          'DataFile': [9]
         }

        """
        broken_cnt = 0
        user_ids = []
        for model_name, type_id_list in get_dv_object_to_object_id_map().items():

            #   Get a list of object ids for this model type
            #   that were not emailed--e.g. should show up
            #   on the notifications pages
            #
            msgt('check: %s %s' % (model_name, type_id_list))
            model_user_id_list = UserNotification.objects.select_related('user'\
                                        ).filter(\
                                        object_type__in=type_id_list,
                                        ).values_list('objectid', 'user__id')

            model_id_list = [x[0] for x in model_user_id_list]

            user_ids += [x[1] for x in model_user_id_list]

            msg('model_id_list len: %s' % len(model_id_list))
            if len(model_id_list) == 0:
                continue

            # Used for later "bad notice" counts
            notice_counter = Counter(model_id_list)
            msg('notice_counter len: %s' % len(notice_counter))

            unique_id_list = list(set(model_id_list))
            msg('unique_id_list len: %s' % len(unique_id_list))

            # Need to upgrade apps files and not use this method
            model_class = eval(model_name)
            if model_name in ['DvObject', 'DatasetVersion', 'FileMetadata']:
                existing_ids = model_class.objects.filter(id__in=unique_id_list\
                                            ).values_list('id', flat=True\
                                            ).distinct()
            else:
                existing_ids = model_class.objects.select_related('dvobject'\
                                    ).filter(dvobject__id__in=unique_id_list\
                                    ).values_list('dvobject__id', flat=True\
                                    ).distinct()

            msg('existing_ids len: %s' % len(existing_ids))

            if len(unique_id_list) == len(existing_ids):
                # Looks good!
                continue

            missing_ids = list(set(unique_id_list) - set(existing_ids))
            for missing_id in missing_ids:
                broken_cnt += notice_counter.get(missing_id, 0)

        unique_user_ids = len(set(user_ids))

        return (broken_cnt, unique_user_ids)
Exemple #16
0
    def make_json_files(self):

        # Set publication status
        #
        filters = {}
        if self.published_only:
            filters.update(query_helper.get_is_published_filter_param())

        # Query for dataset ids
        #
        ds_id_query = Dataset.objects.filter(**filters\
                            ).annotate(ds_id=F('dvobject__id')\
                            ).values_list('ds_id', flat=True\
                            ).order_by('ds_id')

        # Iterate through dataset ids
        #
        #start_time = datetime.now()
        start_time = int(time.time()) # epoch seconds

        cnt = 0
        no_versions_found_list = [45900]

        for ds_id in ds_id_query:
            cnt += 1
            msgt('(%d) Checking dataset id %s' % (cnt, ds_id))
            if ds_id < self.dataset_start_id:
                msg('skipping...(start at dataset id: %d)' % self.dataset_start_id)
                continue

            # Create file name
            #
            fname = 'ds_%s.json' % (str(ds_id).zfill(8))
            full_fname = join(OUTPUT_DIR, fname)

            # Should we overwrite the existing file?
            #
            if isfile(full_fname) and not self.overwrite_existing_files:
                msg('skipping...file already exists')
                continue

            dataset_version = get_latest_dataset_version(ds_id)

            if dataset_version is None:
                msg("Could not find dataset_version!")
                no_versions_found_list.append(ds_id)
                continue

            dataset_as_json = DatasetSerializer(dataset_version).as_json()

            open(full_fname, 'w').write(json.dumps(dataset_as_json, indent=4))
            msg('File written: %s' % full_fname)

            if cnt % 500 == 0:
                self.show_elapsed_time(start_time)
            #if cnt > 10:
            #    self.show_elapsed_time(start_time)
            #    break

        self.show_elapsed_time(start_time)
        print 'no_versions_found_list: %s' % no_versions_found_list
Exemple #17
0
    def get_basic_stats():


        cnt_read_notifications = UserNotification.objects.filter(\
                                    readnotification=True,
                                    ).count()

        cnt_unread_notifications = UserNotification.objects.filter(\
                                    readnotification=False,
                                    ).count()

        cnt_undated_notifications = UserNotification.objects.filter(\
                                    senddate__isnull=True
                                    ).count()

        day_cnt_1 = 365
        day_cnt_1_date = datetime.now() - timedelta(days=day_cnt_1)

        day_cnt_2 = 180
        day_cnt_2_date = datetime.now() - timedelta(days=day_cnt_2)

        cnt_old_unread_notifications = UserNotification.objects.filter(\
                                    readnotification=False,
                                    senddate__lt=day_cnt_1_date
                                    ).count()

        cnt_old_unread_notifications2 = UserNotification.objects.filter(\
                                    readnotification=False,
                                    senddate__lt=day_cnt_2_date
                                    ).count()

        broken_cnt, impacted_users = NotificationStats.get_count_broken_notifications(
        )
        msg('broken_cnt: %s' % broken_cnt)
        msg('impacted_users: %s' % impacted_users)


        file_stats = dict(\

            cnt_broken_notifications=NamedStat(\
                                'Broken Notifications / Impacted Users',
                                broken_cnt,
                                ('The notification refers to an object that'
                                 ' no longer exists.  These notifications should'
                                 ' be deleted from the database. (May be'
                                 ' responsible for some users who receive an'
                                 ' error when clicking on the notifications'
                                 ' tab.)'),
                                'view_broken_notifications',
                                **dict(stat2=impacted_users)),

            cnt_read_notifications=NamedStat(\
                                'Read Notifications',
                                cnt_read_notifications,
                                ('Count of read notifications'),
                                None),

            cnt_unread_notifications=NamedStat(\
                                'All Unread Notifications',
                                cnt_unread_notifications,
                                ('Count of unread notifications.'),
                                None),

            cnt_unread_old_notifications=NamedStat(\
                                'Unread: Older than %s Days' % day_cnt_1,
                                cnt_old_unread_notifications,
                                ('Count of'
                                 ' notifications <b>older'
                                 ' than %d days</b>') % day_cnt_1,
                                None),

            cnt_old_unread_notifications2=NamedStat(\
                                'Unread: Older than %s Days' % day_cnt_2,
                                cnt_old_unread_notifications2,
                                ('Count of'
                                 ' notifications <b>older'
                                 ' than %d days</b>') % day_cnt_2,
                                None),

            cnt_undated_notifications=NamedStat(\
                                'Undated Notifications',
                                cnt_undated_notifications,
                                ('Count of undated notifications'),
                                None),
            #cnt_harvested_zero=NamedStat(\
            #                    'Filesize 0 (Harvested)',
            #                    cnt_harvested_zero,
            #                    ('Count of harvested Datafiles displaying a'
            #                     ' size of 0 bytes')),
            )

        return file_stats
Exemple #18
0
    def get_data_rows(self, as_json=False, pretty_print=False):
        """
        Return information as JSON
            {
                "data" :
                    "total_row_count" : 117
                    "preview_row_count" : 50
                    "column_names" : ["Name", "Position", "Office"]
                    "rows" : [
                        [
                          "Tiger Nixon",
                          "System Architect",
                          "Edinburgh"
                        ],
                        [
                          "Garrett Winters",
                          "Accountant",
                          "Tokyo"
                        ]
                    ]
            }
        """
        if self.has_error():
            return None

        # Read the table
        try:
            if self.is_excel:
                msgt('Excel!')

                df = pd.read_excel(self.filepath)
                #error_bad_lines=False)
            else:
                df = pd.read_table(self.filepath)
        except Exception as ex_obj:
            msg(ex_obj)
            msgt('Failed to open file via pandas!')
            temp_file_helper.make_sure_file_deleted(self.filepath)
            if self.is_excel:
                self.add_error('Failed to open Excel file via pandas. [%s]' %
                               ex_obj)
            else:
                self.add_error(
                    '<b>Probably not a tabular file!</b> Failed to open file via pandas. [%s]'
                    % ex_obj)
            return None

        self.describe_as_html = df.describe().to_html()
        json_string = df.describe().to_json()
        self.describe_as_dict = json.loads(json_string,
                                           object_pairs_hook=OrderedDict)

        # Retrieve the columns
        self.column_names = df.columns.tolist()

        # Retrieve the rows
        self.data_rows = df[:self.num_preview_rows].values.tolist()

        #print 'rows', json.dumps(rows)

        # Format the response
        info_dict = OrderedDict()

        info_dict['total_row_count'] = len(df.index)
        info_dict['preview_row_count'] = len(self.data_rows)
        info_dict['column_names'] = self.column_names
        info_dict['rows'] = self.data_rows
        info_dict['describe_as_html'] = self.describe_as_html
        info_dict['describe_as_dict'] = self.describe_as_dict

        if as_json:
            if pretty_print:
                return json.dumps(info_dict, indent=4)
            return json.dumps(info_dict)

        return info_dict