def test_excel_to_ndjson(self): out_f = self.output_folder / "test_excel_to_ndjson_out.ndjson" self.converter.convert_file(self.input_folder / 'sample.csv', 'ndjson', out_f) self.assertEqual( ndjson.load(out_f.open()), ndjson.load((self.input_folder / 'sample_null.ndjson').open()))
def get_vbb_data(centre): global stations global station_types g = Graph() with open('nodes.ndjson') as f: dataSta = ndjson.load(f) # convert to and from objects textSta = ndjson.dumps(dataSta) dataSta = ndjson.loads(textSta) for i in dataSta: #tupel = str(i['metadata']['x'])+","+str(i['metadata']['y']) x = float(i['metadata']['longitude']) y = float(i['metadata']['latitude']) idSt = str(i['id']) g.add_node(idSt) stations[idSt] = (x, y) # g.add_node(tupel) with open('edges.ndjson') as f: dataDist = ndjson.load(f) # convert to and from objects textDist = ndjson.dumps(dataDist) dataDist = ndjson.loads(textDist) for i in dataDist: stationA = str(i['source']) stationB = str(i['target']) distance = int(i['metadata']['time']) line = i['metadata']['line'] if line.startswith('RB') or line.startswith('RB'): station_types[stationA] = 1 station_types[stationB] = 1 elif line.startswith('U') or line.startswith('S'): if stationA in station_types: if station_types[stationA] > 1: station_types[stationA] = 2 else: station_types[stationA] = 2 if stationB in station_types: if station_types[stationB] > 1: station_types[stationB] = 2 else: station_types[stationB] = 2 else: if stationA in station_types: if station_types[stationA] > 2: station_types[stationA] = 3 else: station_types[stationA] = 3 if stationB in station_types: if station_types[stationB] > 2: station_types[stationB] = 3 else: station_types[stationB] = 3 g.add_edge(stationA, stationB, distance) return dijsktra(g, centre) # Station name of Dabendorf node: 900000245024
def test_csv_semicolon_to_ndjson(self): self.df_handler.convert_file(self.input_folder / 'invoice_semicolon_delimiter.csv', self.out_file_path, 'csv', 'ndjson', read_options={'sep': ';'}) self.assertCountEqual( ndjson.load(self.out_file_path.open()), ndjson.load((self.input_folder / 'invoice.ndjson').open()))
def test_excel_one_sheet_to_ndjson(self): self.df_handler.convert_file(self.input_folder / 'invoice_multi_sheets.xlsx', self.out_file_path, 'excel', 'ndjson', read_options={'sheet_name': 'Sheet2'}) self.assertCountEqual( ndjson.load(self.out_file_path.open()), ndjson.load( (self.input_folder / 'invoice_id_reversed.ndjson').open()))
def test_csv_to_ndjson_with_aito_schema(self): with (self.input_folder / 'invoice_aito_schema_altered.json').open() as f: schema_altered = json.load(f) self.df_handler.convert_file(self.input_folder / 'invoice.csv', self.out_file_path, 'csv', 'ndjson', use_table_schema=schema_altered) self.assertCountEqual( ndjson.load(self.out_file_path.open()), ndjson.load((self.input_folder / 'invoice_altered.ndjson').open()))
def loadEventJson(path, jsonlInput): '''This automatically supports files that have been compressed with `gzip`.''' if (jsonlInput): import ndjson as json else: import json try: with gzip.open(path, FILE_MODE_READ_TEXT) as gid: all_recs = json.load(gid) except OSError: with open(path, FILE_MODE_READ) as gid: all_recs = json.load(gid) return all_recs
def __init__(self, primary_files, min_count, supporting_files=None): self.negatives = [] self.discards = [] self.negpos = 0 self.word2id = dict() self.id2word = dict() self.word_frequency = dict() self.token_count = 0 self.max_num_words_file = 0 self.primary_files = primary_files self.supporting_files = supporting_files self.file_paths = primary_files if supporting_files is None else primary_files + supporting_files self.data = None if self.ndJson: with open(self.primary_files) as f: self.data = ndjson.load(f) self.readWordsNdJson(min_count) else: self.readWords(min_count) self.initTableNegatives() self.initTableDiscards()
def get_segmented_reviews(retrievepath, savepath): '''This method takes in retrieve path to get the data source json file and dump the data into other file in save path, after doing sentence segmentation on reviews.''' # open the json file and read the reviews in. # The file is actually ndjson(seperated by newlines not commas) try: with open(retrievepath, encoding='latin-1') as f: datastore = ndjson.load(f) except IOError: print('An error occurred trying to read the file.') # using sent_tokenize() to split a review text into a list of sentences. for review in datastore: review['text'] = sent_tokenize(review['text']) # number of sentence in each review text review['num_sentence'] = len(review['text']) # save the sengmented comments to data folder for further analysis try: with open(savepath, 'w+') as f: ndjson.dump(datastore, f) except IOError: print('An error occurred trying to save the file.')
def generate_class(filename): CLASS_NAME = filename.replace('.ndjson', '').replace('full_simplified_', '') print(f'Begin generating {CLASS_NAME} images...') try: os.mkdir(f'{OUTPUT_BASE_DIR}{CLASS_NAME}') except: pass # Ignore if dir already exists with open(f'{INPUT_DIR}{filename}') as f: data = ndjson.load(f) print(f"Size of {CLASS_NAME}s set: {len(data)}") count = 0 for sample in data: if sample['recognized'] != True: continue canvas = generate_image(sample) canvas = resize_image(canvas) save_image(canvas, CLASS_NAME, count) count += 1 if count % (NUMBER_OF_IMAGES / 4) == 0: print( f"Generated {(count*100)/NUMBER_OF_IMAGES}% of {CLASS_NAME} images..." ) if count == NUMBER_OF_IMAGES: break print(f'Finished generating {CLASS_NAME} images.') return
def process_kibana_object(obj_type, exportpath, indexpattern=None): print('# Processing kibana object: %s' % obj_type) if obj_type != 'index-pattern': src_file_name = '%s%s' % (EXPORT_FILES_PREFIX_KIBANA, obj_type) else: if indexpattern is None: for i in INDEX_PATTERNS_FILTER.split('|'): process_kibana_object(obj_type, exportpath, indexpattern=i) return else: src_file_name = '%s%s_%s' % (EXPORT_FILES_PREFIX_KIBANA, obj_type, indexpattern) src_file = os.path.join(exportpath, '%s.ndjson' % src_file_name) diff_file = os.path.join(exportpath, DIFF_PATH, '%s.json' % src_file_name) print('\tOpening %s: %s' % (obj_type, src_file)) with open(src_file, 'r') as f: src_ndjson = ndjson.load(f) for s in src_ndjson: if obj_type == 'index-pattern': s['attributes']['fields'] = sorted(json.loads(s['attributes']['fields']), key=lambda x : x['name']) elif obj_type == 'search': s['attributes']['kibanaSavedObjectMeta']['searchSourceJSON'] = json.loads(s['attributes']['kibanaSavedObjectMeta']['searchSourceJSON']) elif obj_type == 'visualization': s['attributes']['kibanaSavedObjectMeta']['searchSourceJSON'] = json.loads(s['attributes']['kibanaSavedObjectMeta']['searchSourceJSON']) s['attributes']['visState'] = json.loads(s['attributes']['visState']) elif obj_type == 'dashboard': s['attributes']['kibanaSavedObjectMeta']['searchSourceJSON'] = json.loads(s['attributes']['kibanaSavedObjectMeta']['searchSourceJSON']) s['attributes']['optionsJSON'] = json.loads(s['attributes']['optionsJSON']) s['attributes']['panelsJSON'] = json.loads(s['attributes']['panelsJSON']) print('\tWriting output to: %s' % diff_file) with open(diff_file, 'w') as f: json.dump(src_ndjson, f, indent=4, sort_keys=True)
def parse_label(filename, path=RAW_DIR_NAME, decode=None): """ Helper for parse_dataset: parses a single .ndjson file associated with the specified path @param filename (str): string specifying the path to the .ndjson file to parse @param decode (None or "jpg"): whether to decode sketches as images. By default, sketches are saved as ndjson files. @param path - str: folder where training examples will be stored. """ list_ids = [] label, _ = os.path.splitext(filename) full_filename = os.path.join(path, filename) with open(full_filename) as f: if decode == 'jpg': dir_name = os.path.join(path, '../img/' + label) else: dir_name = os.path.join(path, label) if not os.path.exists(dir_name): os.makedirs(dir_name) drawings = ndjson.load(f) for drawing in drawings: example_filename = save_training_example(drawing, dir_name, decode) list_ids.append(example_filename) return list_ids
def extract_scenes(paths, outdir): ''' Just extract scenic comments and save them somewhere ''' # iterate through files for path in tqdm(paths): # get file name fname = os.path.basename(path) outpath = os.path.join(outdir, fname) # open file with open(path) as f: session = ndjson.load(f) # process all docs in that file scenes = [] for doc in session: # split texts and scenic comments scene, text = extract_scenic_comments(doc, tokentype='text') scenes.append(scene) # export try: with open(outpath) as f: ndjson.dump(scenes, f) except FileNotFoundError: pass
def infodynamics_plots(ntr_paths, model_dir): ''' Make adaptline & regline for each result/window ''' # for every path given for path in ntr_paths: # get extract window w = re.findall(r'\d+', os.path.basename(path)) assert len(w) == 1 w = w[0] print('[info] plotting window {}'.format(w)) # open infodynamics results with open(path) as f: ntr_df = pd.DataFrame(ndjson.load(f)) try: plot_window( ntr_df=ntr_df, w=w, model_dir=model_dir ) except np.linalg.LinAlgError: print('{} is a singular matrix'.format(w))
def get_label_vocab(train_data_path): with open(train_data_path) as i: d = ndjson.load(i) labels = list(set([i["label"] for i in d])) label_vocab = {label:i for i,label in enumerate(labels)} return label_vocab
def lookup_references(self): """Looks up references to a patient""" resource_paths = self.load_resources() self.references = {} for resource_path in resource_paths: with open(resource_path, 'r') as inf: resources = ndjson.load(inf) for resource in resources: if 'patient' in resource.keys(): if resource['patient']['reference'].split( '/')[1] == self.patient_id: resource_type = resource['resourceType'] if resource_type not in self.references.keys(): self.references[resource_type] = 1 else: self.references[resource_type] += 1 elif 'subject' in resource.keys(): if resource['subject']['reference'].split( '/')[1] == self.patient_id: resource_type = resource['resourceType'] if resource_type not in self.references.keys(): self.references[resource_type] = 1 else: self.references[resource_type] += 1
def lookup_encounters(self): with open('./data/Encounter.ndjson', 'r') as inf: encounter_file = ndjson.load(inf) for i, encounter in enumerate(encounter_file): #print(encounter) patient_reference = patient = encounter['subject'][ 'reference'].split('/')[1] #print(patient_reference, patient_id) if patient_reference == self.patient_id: print("cool") print(encounter['resourceType']) patient = encounter['subject']['reference'] practitioner = encounter['participant'][0]['individual'][ 'reference'] if 'Practitioner' not in self.references.keys(): self.references['Practitioner'] = 1 else: self.references['Practitioner'] += 1 location = encounter['location'][0]['location']['reference'] if 'Location' not in self.references.keys(): self.references['Location'] = 1 else: self.references['Location'] += 1 organization = encounter['serviceProvider']['reference'] if 'Organization' not in self.references.keys(): self.references['Organization'] = 1 else: self.references['Organization'] += 1
def main(): ''' Run preprocessing ''' # initialize argparser with a desc ap = argparse.ArgumentParser( description="Parallelized maximal preprocessing using stanza") # input path ap.add_argument("-p", "--inpath", required=True, help="path to ndjson with texts to process") # output path ap.add_argument("-o", "--outpath", required=True, help="where results will be saved") # language of texts ap.add_argument("--lang", required=False, type=str, default='da', help="two character ISO code of a desired language") # window ap.add_argument("--jobs", required=False, type=int, default=4, help="number of workers to split the job between.") # hotfix ap.add_argument("--bugstring", required=False, type=bool, default=False, help="remove seqences of equal signs from documents?") # parse that args = vars(ap.parse_args()) # run functions down the line print('[info] Importing {}'.format(args['inpath'])) with open(args['inpath'], 'r') as f_in: texts = ndjson.load(f_in) print('[info] Clearing buggy strings.') if args['bugstrings']: texts = _delete_many_equal_signs(texts) print('[info] Stanza starting {} jobs'.format(args['jobs'])) dfs_out = stanza_multicore(texts=texts, lang=args['lang'], n_jobs_gpu=args['jobs']) print('[info] Saving results to {}'.format(args['outpath'])) with open(args['outpath'], "w") as f_out: ndjson.dump(dfs_out, f_out)
def convert(compressedJSONFile, destDir=".", force = False, skipchecks=False): path = os.path.normpath(compressedJSONFile) fileName = path.split(os.sep)[-1] date = datetimeFromARDFilename(fileName) day = cd.dayFromDate(date) newFile = destDir+"/NPGEO-RKI-{}.csv".format(cd.dateStrYMDFromDay(day)) redo = False if not skipchecks: # check if previous file exist and make sure the current file is not broken previousFile = destDir+"/NPGEO-RKI-{}.csv".format(cd.dateStrYMDFromDay(day-1)) yesterDayRows = -1 if os.path.isfile(previousFile): yesterdayFrame = dt.fread(previousFile) yesterDayRows = yesterdayFrame.nrows else: print("No file for previous day {}".format(day-1)) allowedShrinkageDays = [33,68] allowedSameDays = [33] allowedJumpDays = [46,66] if not force and os.path.isfile(newFile) and yesterDayRows >= 0: existingFrame = dt.fread(newFile) existingRows = existingFrame.nrows if existingRows < yesterDayRows: if not day in allowedShrinkageDays: print("Existing .csv file for day {} contains less rows ({}) than previous day file ({}), redoing".format(day,existingRows,yesterDayRows)) redo = True else: print("On day {} the number of rows was reduced from {} to compared to yesterday's file ({})".format(day,existingRows,yesterDayRows)) else: if existingRows == yesterDayRows: if not day in allowedSameDays: print("Existing .csv file for day {} contains same number of rows ({}) than previous day file ({}), redoing".format(day,existingRows,yesterDayRows)) redo = True else: print( "Existing .csv file for day {} contains same number of rows ({}) than previous day file ({}) but we can't do anything about it".format( day, existingRows, yesterDayRows)) elif (existingRows > yesterDayRows * 1.1) and (existingRows - yesterDayRows > 5000) and not day in allowedJumpDays: print("Existing .csv file for day {} contains much more rows ({}) than previous day file ({}), redoing".format(day,existingRows,yesterDayRows)) redo = True print("Existing .csv file contains {} rows, {} more than yesterday".format(existingRows,existingRows-yesterDayRows)) if force or redo or not os.path.isfile(newFile): print("Loading " + compressedJSONFile) #with bz2.open(compressedJSONFile, "rb") as f: with lzma.open(compressedJSONFile, "rb") as f: content = ndjson.load(f) frame = dt.Frame(content) if frame.nrows <= yesterDayRows and not day in allowedShrinkageDays: print("Rejecting '{}' because it contains less rows than yesterdays file".format(compressedJSONFile)) return print("Saving " + newFile) frame.to_csv(newFile) else: print("Skipping '{}' because '{}' already exists".format(compressedJSONFile, newFile))
def load_ndjson(path: str) -> Union[List, Dict]: try: with open(path, "r") as read_file: data = ndjson.load(read_file) except ValueError as e: print("Invalid json: %s" % e) return None return data
def load_data(drawing_count): # load from file-like objects with open('house.ndjson') as f: drawings = ndjson.load(f) return [ transform_drawing(drawing) for drawing in drawings[0:drawing_count] if len(drawing['drawing']) > 5 ]
def concat_texts_timebins(lemma_path, metadata_path, outdir, timebin='10Min'): ''' ''' # load files with open(lemma_path) as fin: file_lemma = ndjson.load(fin) with open(metadata_path) as fin: file_meta = ndjson.load(fin) timestamps = [doc['start'] for doc in file_meta] del file_meta # resample df_resampled = (pd.DataFrame( file_lemma, index=pd.to_timedelta(timestamps)).resample(timebin).sum()) # get rid of 0 (no document in time bin) df_resampled = (df_resampled.replace(0, np.nan).dropna()) # get rid of [] (there is a document but no features in time bin) df_resampled['text'] = df_resampled.text[df_resampled.text.apply(len) > 0] df_resampled = (df_resampled.dropna().reset_index()) # get timestamp as str df_resampled['time'] = df_resampled['index'].astype(str).str.extract( 'days (.*?)\.') # serialize file_res = [] for i, row in df_resampled.iterrows(): res = dict() res.update({ 'time': row.time, 'text': row.text, 'lemma': row.lemma, 'pos': row.pos, 'dep': row.dep, 'ner': row.ner }) file_res.append(res) outfname = os.path.basename(lemma_path) with open(os.path.join(outdir, outfname), 'w') as fout: ndjson.dump(file_res, fout) return None
def load_dataset(f, label_vocab): with open(f) as i: d = ndjson.load(i) table = [(PROMPT + " " + i["text"], label_vocab[i["label"]]) for i in d] df = pd.DataFrame(table) df.columns = ["sentence", "label"] return df
def process_kibana_object(self, obj_type, indexpattern=None): """ Create json from ndjson kibana object to ease diff during commits """ print("# Processing kibana object: %s" % obj_type) if obj_type != "index-pattern": src_file_name = "%s%s" % (EXPORT_FILES_PREFIX_KIBANA, obj_type) else: if indexpattern is None: for i in INDEX_PATTERNS_FILTER.split("|"): self.process_kibana_object(obj_type, indexpattern=i) return else: src_file_name = "%s%s_%s" % ( EXPORT_FILES_PREFIX_KIBANA, obj_type, indexpattern, ) src_file = os.path.join(self.export_path, "%s.ndjson" % src_file_name) diff_file = os.path.join(self.export_path, DIFF_PATH, "%s.json" % src_file_name) print("\tOpening %s: %s" % (obj_type, src_file)) with open(src_file, "r") as src_ndjson_file: src_ndjson = ndjson.load(src_ndjson_file) for src_ndjson_line in src_ndjson: if obj_type == "index-pattern": src_ndjson_line["attributes"]["fields"] = sorted( json.loads(src_ndjson_line["attributes"]["fields"]), key=lambda x: x["name"], ) elif obj_type == "search": src_ndjson_line["attributes"]["kibanaSavedObjectMeta"][ "searchSourceJSON"] = json.loads( src_ndjson_line["attributes"]["kibanaSavedObjectMeta"] ["searchSourceJSON"]) elif obj_type == "visualization": src_ndjson_line["attributes"]["kibanaSavedObjectMeta"][ "searchSourceJSON"] = json.loads( src_ndjson_line["attributes"]["kibanaSavedObjectMeta"] ["searchSourceJSON"]) src_ndjson_line["attributes"]["visState"] = json.loads( src_ndjson_line["attributes"]["visState"]) elif obj_type == "dashboard": src_ndjson_line["attributes"]["kibanaSavedObjectMeta"][ "searchSourceJSON"] = json.loads( src_ndjson_line["attributes"]["kibanaSavedObjectMeta"] ["searchSourceJSON"]) src_ndjson_line["attributes"]["optionsJSON"] = json.loads( src_ndjson_line["attributes"]["optionsJSON"]) src_ndjson_line["attributes"]["panelsJSON"] = json.loads( src_ndjson_line["attributes"]["panelsJSON"]) print("\tWriting output to: %s" % diff_file) with open(diff_file, "w") as dst_json_file: json.dump(src_ndjson, dst_json_file, indent=4, sort_keys=True)
def test_csv_to_compressed_ndjson(self): self.df_handler.convert_file(self.input_folder / 'invoice.csv', self.out_file_path, 'csv', 'ndjson', convert_options={'compression': 'gzip'}) self.assertCountEqual( read_ndjson_gz_file(self.out_file_path), ndjson.load((self.input_folder / 'invoice.ndjson').open()))
def load_data(ndjson_path): ''' Read a preprocessed file & convert to ttx format. ''' with open(ndjson_path, 'r') as f: obj = ndjson.load(f) obj_dfs = [pd.DataFrame(dat) for dat in obj] return obj_dfs
def get_reviews(path): try: with open(path) as f: data_set = ndjson.load(f) except IOError: print('An error occurred trying to read the file.') all_reviews = [] for data in data_set: all_reviews.append(data['text']) return all_reviews
def parse_dataset(path=RAW_DIR_NAME, decode=None, early_return=True): """ Restructures dataset from '.ndjson' files into folders. Each folder will be of the form 'dataset/{LABEL}' and will contain 1 file per training example. Also saves the list of all filenames to 'filenames.txt'. @param path - str: path to directory containing dataset @param decode - None or "jpg" - how to decode training examples @param early_return - bool: indicates whether method should return early if 'filenames.txt' already exists @returns list containing all the filenames of the training examples (relative to path) @returns list containing all the labels of the dataset """ list_ids = [] labels = set() # If the filenames.txt file already exists, parse the file to find # list_ids and labels, and return early if decode == 'jpg': list_ids_filename = os.path.join(path, '../img/' + 'filenames.txt') else: list_ids_filename = os.path.join(path, 'filenames.txt') if early_return and os.path.exists(list_ids_filename): with open(list_ids_filename) as f: list_ids = ndjson.load(f) for list_id in list_ids: label = os.path.basename(os.path.dirname(list_id)) labels.add(label) return list_ids, list(labels) # Loop through all '.ndjson' files and split into individual files pool = mp.Pool(mp.cpu_count()) files = os.listdir(path) files = [f for f in files if os.path.splitext(f)[1] == '.ndjson'] list_ids_temp = [] parse = functools.partial(parse_label, path=path, decode=decode) pool.map_async(parse, files, callback=list_ids_temp.extend) pool.close() pool.join() # Convert list_ids_temp from list of lists to just a list list_ids = [] for list_id in list_ids_temp: list_ids += list_id # Write output to 'dataset/filename.txt' and find all labels with open(list_ids_filename, 'w') as f: ndjson.dump(list_ids, f) for list_id in list_ids: label = os.path.basename(os.path.dirname(list_id)) labels.add(label) return list_ids, list(labels)
def create_image_records(self): """Loop through the image files and write to tfrecords""" data_left = True passes_through_file = 5 amount = 0 while data_left: amount += 1 writer = tf.python_io.TFRecordWriter(self.output_file + '/record_' + str(passes_through_file)) for x, filename in enumerate(tqdm( self.C.file_list)): #For all the files names x += 1 if x > 3: break with open(filename) as f: data = ndjson.load(f) start_index = passes_through_file * self.images_per_class_per_tf_record end_index = start_index + self.images_per_class_per_tf_record if end_index > len(data): end_index = len(data) for j in range( start_index, end_index ): #Go through the strokes and construct image from it line = data[j] if j < (passes_through_file * self.images_per_class_per_tf_record ) + self.images_per_class_per_tf_record: img = self.draw_it(line).reshape(1, 255, 255) img_raw = img.tostring() feature = { 'class': self._int64_feature( self.C.class_list.index(line['word'])), 'key': self._bytes_feature(str.encode( line['key_id'])), 'image_raw': self._bytes_feature(img_raw) } example = tf.train.Example( features=tf.train.Features(feature=feature)) writer.write(example.SerializeToString()) else: break passes_through_file += 1 print("closing writer") writer.close() break #only do one file for now
def import_normalize(doctop_path, train_data_path, meta_data_path, datetime_col='time'): ''' Import & normalize a document-topic matrix. So far, only the averaging method is implemented! ''' # DOCTOP with open(doctop_path) as f: doctop = ndjson.load(f) # normalize norm_all = [[value / sum(doc) for value in doc] for doc in doctop] # to df norm_df = pd.DataFrame(norm_all) # test length assert norm_df.values.tolist() == norm_all # TRAIN DATA with open(train_data_path) as f: input_texts = ndjson.load(f) # iDs of documents used for training ids = [doc['id'] for doc in input_texts] # META DATA (dates) meta = pd.read_csv(meta_data_path, parse_dates=[datetime_col]) # keep only docs used for training meta_trained = meta.iloc[ids, :] # AGGREGATE days = meta_trained['time'].dt.floor('d') # parse_timebin_size norm_df.index = days.index norm_df['days'] = days topic_col_names = [col for col in norm_df.columns if col != 'days'] # average avg_topic_df = norm_df.groupby('days')[topic_col_names].mean() doctop_avg = avg_topic_df.values.tolist() # normalize again doctop_avg = [[value / sum(doc) for value in doc] for doc in doctop_avg] return doctop_avg
def test_encounter_process(self): encounter = FHIREncounterResourceManager() with open('test/encounter.ndjson') as f: encounter_data = ndjson.load(f) data = encounter.run_encounter_process(encounter_data[0]) self.assertNotEqual(data['source_id'], None) self.assertNotEqual(data['patient'], None) self.assertNotEqual(data['start_date'], None) self.assertNotEqual(data['end_date'], None)