df_medspacy_annotations = pd.DataFrame(columns=[ 'Tweet_id', 'Text_section', 'Span_start', 'Span_end', 'Annotation_type', 'Extras' ]) df_medspacy_tweets_tagged = pd.DataFrame( columns=['Tweet_id', 'Tweet_full_text']) #------------We setup the tagger using Quick UMLS------------------- print("Configuring the Medspacy tagger. Please wait...") nlp = {} #Configuring the Medspacy Tagger nlp['default'] = en_info_3700_i2b2_2012.load() sectionizer = Sectionizer(nlp['default']) nlp['default'].add_pipe(sectionizer) for index, row in df_filtered.iterrows(): annotation_default = nlp['default'](str(row['tweet_text'])) #Default model if len(annotation_default.ents) > 0: df_medspacy_tweets_tagged.loc[len(df_medspacy_tweets_tagged.index)] = [ row['tweet_id'], row['tweet_text'] ] for ent in annotation_default.ents: df_medspacy_annotations.loc[len(df_medspacy_annotations)] = [ row['tweet_id'], ent, ent.start_char, ent.end_char, ent.label_,