def process_score_doc(kword, doc): f = io.open(doc, 'r', errors='ignore') s = f.read() candidate_sents = [] orig_sentences = sent_tokenize(s) sentences = [word_tokenize(sent) for sent in orig_sentences] sentences = [pos_tag(sent) for sent in sentences] sent_index = 0 for sent in sentences: NER_sent = ne_chunk(sent) iob_tags = tree2conlltags(NER_sent) for i in iob_tags: if i[2] in question_tag: # calculate combined TF-IDF combined_tf_idf = 0 for k in question_keywords: tf_s = tf_sent(k, sent) idf_s = math.log(float(len(sentences) - tf_s + 0.5)/float(tf_s + 0.5)) tf_idf_s = tf_s*idf_s combined_tf_idf += tf_idf_s candidate_sents.append((orig_sentences[sent_index], combined_tf_idf)) else: pass sent_index += 1 return list(set(candidate_sents))
def entities2token(tokenized_sentence, name_token=None, gpe_token=None): # Check whether we do any NE replacement. Avoids building the tree in some cases if name_token is not None or gpe_token is not None: tagged = nltk.pos_tag(tokenized_sentence) # Tag the named entities ne_tagged = nltk.tree2conlltags(nltk.ne_chunk(tagged)) # Replace names if name_token is not None: ne_tagged = [(name_token, tag, ne_tag) if ne_tag.endswith('PERSON') else (token, tag, ne_tag) for (token, tag, ne_tag) in ne_tagged] # Replace geopolitical entities if gpe_token is not None: ne_tagged = [(gpe_token, tag, ne_tag) if ne_tag.endswith('GPE') else (token, tag, ne_tag) for (token, tag, ne_tag) in ne_tagged] # Discard the NE tokens tagged = [(token, tag) for (token, tag, ne_tag) in ne_tagged] # Recollect the tokens tokens = [token for (token, tag) in tagged] else: tokens = tokenized_sentence # Convert them to lowercase tokens = [token.lower() for token in tokens] return tokens
def Ext_Chunks(sents): NP_li = [] # print(sents) grammar_exp = r""" CHUNK: {<NN><NN.*><NN.*>+} # chunk determiner/possessive, adjectives and noun }<NNP>+{ # chunk sequences of proper nouns """ # cp = nltk.RegexpParser('CHUNK: {<NN><NN.*><NN.*>+}}<NNP>{') cp = nltk.RegexpParser(grammar_exp) # cp = nltk.RegexpParser('CHUNK: {<DT>?<JJ.*>*<NN.*>+}') for sent in sents: tree = cp.parse(sent) # print(tree.draw()) for subtree in tree.subtrees(): if subtree.label() == 'CHUNK': print(subtree) iob_tags = tree2conlltags(subtree) iob_tree = conlltags2tree(iob_tags) print(iob_tags) print(iob_tree) chunk_words = str(subtree).replace('/DT', '').replace('/JJS', '').replace('/JJ', '').replace('/NNS', '').replace( '/NNP', '').replace('(CHUNK', '').replace(')', '').replace('/NN', '').replace('\n', '') NP_li.append(chunk_words) print(chunk_words, '\n') print('----------------------------------------------------------------\n',NP_li) return NP_li
def google_search(question): first_page = google.search(question, 1) #print first_page top_three_result = [] i = 0 while i < 5: top_three_result.append(first_page[i].description) i += 1 first_search = ''.join(top_three_result).encode('ascii', 'replace') #print first_search ne_tree = (ne_chunk(pos_tag(word_tokenize(first_search)))) iob_tagged = tree2conlltags(ne_tree) ss = [tuple(map(str, eachTuple)) for eachTuple in iob_tagged] question_type = classify_question(question) print 'question_type: ', question_type if question_type == 'None': ans = "Oops! I don't know." else: google_answer = [] if question_type == 'Person': for i in range(len(ss)): if ss[i][2] == 'B-PERSON' or ss[i][2] == 'I-PERSON': google_answer.append(ss[i][0]) elif question_type == 'Country': print 'country identified' for i in range(len(ss)): if ss[i][2] == 'B-GPE' or ss[i][2] == 'I-GPE': google_answer.append(ss[i][0]) elif question_type == 'Location': for i in range(len(ss)): if ss[i][2] == 'B-LOCATION' or ss[i][2] == 'I-LOCATION': google_answer.append(ss[i][0]) elif question_type == 'Date': for i in range(len(ss)): if ss[i][2] == 'B-DATE' or ss[i][2] == 'I-DATE': google_answer.append(ss[i][0]) print 'google: ', google_answer if not google_answer: ans = "Oops, I don't know! " else: print 'inside else' counts = collections.Counter(google_answer) print 'counts: ', counts t = counts.most_common(4) candidate_answer = [seq[0] for seq in t] print candidate_answer #new_list = sorted(google_answer, key=lambda x: -counts[x]) #print 'new_list',new_list #ans = ' '.join(new_list) for i in range(len(candidate_answer)): candidate_answer[i] = 'Candidate Answer ' + str( i + 1) + ' ' + candidate_answer[i] candidate_answer = '\n'.join(candidate_answer) ans = candidate_answer return ans
def money_ner(words_tagged): grammar = 'NumPhrase: {<CD|NNS><CD|NNS|JJ>}' t_parser = nltk.RegexpParser(grammar) final_tree = t_parser.parse(words_tagged) final_tags = tree2conlltags(final_tree) return final_tags
def to_dataset(cls, parsed_sentences, feature_detector): X,y = [],[] for parsed in parsed_sentences: iob_tagged = tree2conlltags(parsed) words, tags, iob_tags = zip(*iob_tagged) tagged = zip(words, tags) for index in range(len(iob_tagged)): X.append(feature_detector(tagged, index, history=iob_tags[:index])) y.append(iob_tags[index]) return X,y
def chunking_on_sentence(sentence): pattern = 'NP: {<DT>?<JJ>*<NN>}' parser = nltk.RegexpParser(pattern) parsed_sentence = parser.parse(sentence) bio_tagged_sentence = nltk.tree2conlltags(parsed_sentence) tree = nltk.ne_chunk(bio_tagged_sentence) return bio_tagged_sentence, tree
def recognize_ne(s): """ Recognize named entities in given sentence. Use the NLTK package. :param s: String sentence to tag. :return: Tree structure of NE recognition. """ ne_tree = nltk.ne_chunk(s, binary=False) iob_tags = nltk.tree2conlltags(ne_tree) return iob_tags
def getNamedEntities(self, text): ne_set = set() try: tree = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(text))) iob_tagged = nltk.tree2conlltags(tree) for obj in iob_tagged: if obj[1] == 'NNP' and len(obj[0]) > 3: ne_set.add(obj[0]) except: print "error in NER" return ne_set
def process_language(phrase): processedPhrase = list(my_bigram_tokens(nltk.word_tokenize(phrase))) processedPhrase = list(nltk.pos_tag(processedPhrase)) #lemmas=list([lemma_with_default(T,mytagfixes) for T in processedPhrase]) chunks = clean_chunks(list(nltk.tree2conlltags(cp.parse(processedPhrase)))) processedPhrase = remove_stops_puncs(processedPhrase, stops) #lemmas=list([lemma_with_default(T,mytagfixes) for T in processedPhrase]) #synons=list([get_synset(lemm) for lemm in lemmas]) #return {'keywords':processedPhrase, 'lemmas':lemmas, 'synsets':synons} return processedPhrase, chunks
def ner_tag_text(self, text): """ NER tag text :param text: :return: CONLL IOB format text """ pickle_file = open(self.config_util.TRAIN_MODEL_PICKLE, 'rb') chunker_pickle = pickle.load(pickle_file) pickle_file.close() return tree2conlltags( chunker_pickle.parse(pos_tag(word_tokenize(text))))
def calculateParameters(doc: str, scores: Dict[str, float], cands, pr: Dict[str, float] = None): params = [] max_cand_score = max(scores.values()) all_cands = cands for cand in all_cands: freq = doc.count(cand) # pagerank_score = pr[cand] if cand not in scores: cand_score = 0. else: cand_score = scores[cand] / max_cand_score cand_len = len(cand) cand_term_count = len(cand.split()) first_match = doc.find(cand) / len(doc) last_match = doc.rfind(cand) / len(doc) ne_cand = get_true_case(cand) words = nltk.pos_tag(nltk.word_tokenize(ne_cand)) ne = nltk.tree2conlltags(nltk.ne_chunk(words)) ne = [ ' '.join(word for word, pos, chunk in group).lower() for key, group in itertools.groupby(ne, lambda tpl: tpl[2] != 'O') if key ] ne_cnt = len(ne[0].split()) if ne else 0 if first_match == last_match: spread = 0. else: spread = last_match - first_match params.append([ cand_score, cand_len, cand_term_count, first_match, 1 - last_match, ne_cnt ]) #, pagerank_score]) # , r[cand]]) params = np.array(params) max_ = params.max(axis=0) params = np.divide(params, max_, out=np.zeros_like(params), where=max_ != 0) return dict(zip(all_cands, params))
def get_nltk_vectors(self, texts: List[str]): # https://gist.github.com/japerk/1909413 from textblob import TextBlob sid = self.nltk_sid vsid = self.vader_sid pdict = self.pdict n_tokens_in = self.n_tokens_in rake = self.rake_nltk nltk_texts = [fasttext.tokenize(text) for text in texts] textblob_sentiments = [[sentiment.polarity, sentiment.subjectivity] for sentiment in [TextBlob(text).sentiment for text in texts]] textblob_sentiments = torch.tensor(textblob_sentiments).unsqueeze(1).expand(len(texts), n_tokens_in, 2) textblob_sentiments = textblob_sentiments.to(get_device()) mask = stack_and_pad_tensors(list(map(lambda x: torch.ones(len(x), dtype=int), nltk_texts)), n_tokens_in) mask = mask.to(get_device()) mask = self.is_mask_em(mask) has_digit = stack_and_pad_tensors( list(map(lambda x: torch.tensor([has_digits(str(t)) for t in x]), nltk_texts)), n_tokens_in) has_digit = has_digit.to(get_device()) has_digit = self.has_digit_em(has_digit) m = self.text_model nltk_emb = stack_and_pad_tensors([torch.tensor([m[t] for t in sent]) for sent in nltk_texts], n_tokens_in) # if t in m else np.zeros(m.vector_size) nltk_emb = nltk_emb.to(get_device()) sid_vec = torch.tensor([list(sid.polarity_scores(t).values()) for t in texts]) sid_vec = sid_vec.unsqueeze(1).expand(len(texts), n_tokens_in, sid_vec.size(1)) sid_vec = sid_vec.to(get_device()) vsid_vec = torch.tensor([list(vsid.polarity_scores(t).values()) for t in texts]) vsid_vec = vsid_vec.unsqueeze(1).expand(len(texts), n_tokens_in, vsid_vec.size(1)) vsid_vec = vsid_vec.to(get_device()) conlltags = [[ptags for ptags in nltk.tree2conlltags(ne_chunk(pos_tag(x)))] for x in nltk_texts] pos = stack_and_pad_tensors( list(map(lambda x: torch.tensor([pdict[tag.lower()] for token, tag, ne in x]), conlltags)), n_tokens_in) pos = pos.to(get_device()) pos_emb = self.tag_em(pos) ner = stack_and_pad_tensors( list(map(lambda x: torch.tensor([pdict[ne.lower().split("-")[-1]] for token, tag, ne in x]), conlltags)), n_tokens_in) ner = ner.to(get_device()) ner_emb = self.tag_em(ner) phrases = [get_rake_nltk_phrases(rake, t) for t in texts] key_wc_rake_nltk = [get_rake_nltk_wc(tokens, phr) for tokens, phr in zip(nltk_texts, phrases)] key_wc_rake_nltk = stack_and_pad_tensors(key_wc_rake_nltk, self.n_tokens_in) key_wc_rake_nltk = key_wc_rake_nltk.to(get_device()) nltk_rake_vectors = self.key_wc_rake_nltk(key_wc_rake_nltk) result = torch.cat([vsid_vec, nltk_emb, textblob_sentiments, pos_emb, ner_emb, nltk_rake_vectors, sid_vec, mask, has_digit], 2) result = result.to(get_device()) result = self.nltk_nn(result) return result
def __init__(self, chunked_sents, feature_detector, classifier_builder, **kwargs): # Transform the trees in IOB annotated sentences [(word, pos, chunk), ...] chunked_sents = [tree2conlltags(sent) for sent in chunked_sents] chunked_sents = [triplets2tagged_pairs(sent) for sent in chunked_sents] self.feature_detector = feature_detector self.tagger = ClassifierBasedTaggerBatchTrained( train=(sent for sent in chunked_sents), feature_detector=self.feature_detector, classifier_builder=classifier_builder)
def get_chunktag(self, sentence): grammar = r""" NP: {<DT|JJ|P.*P.*|NN.*>+} PP: {<IN>+} VP: {<VB.*>+} ADVP: {<RB>+} """ pos_sent = nltk.pos_tag(sentence) cp = nltk.RegexpParser(grammar) chunk_tree = cp.parse(pos_sent) chunk_tags = tree2conlltags(chunk_tree) chunk_tags = [ck[-1] for ck in chunk_tags] return chunk_tags
def evaluate(self, gold): # Convert nltk.Tree chunked sentences to (word, pos, iob) triplets chunked_sents = [tree2conlltags(sent) for sent in gold] # Convert (word, pos, iob) triplets to tagged tuples ((word, pos), iob) chunked_sents = [triplets2tagged_pairs(sent) for sent in chunked_sents] print(chunked_sents) dataset = self.tagger._todataset(chunked_sents) featuresets, tags = zip(*dataset) predicted_tags = self.tagger.classifier().classify_many(featuresets) return accuracy(tags, predicted_tags)
def checking_org(self, text): # First list of words, and cleaning from stopwords. words = word_tokenize(text) words = [w for w in words \ if w.lower() not in stopwords.words('english')] # Tagging the list. ptree = pos_tag(words) # FInally we simplify the tree and check if any word represents an # organization. This check can be definitvely inproved. for w in tree2conlltags(ne_chunk(ptree)): if (w[2][2:] == 'ORGANIZATION') and (w[1] == 'NNP'): return True return False
def pos_tag_nltk(pos_tagger, sentence): tokens = word_tokenize(sentence) # tokenization # pos_tagging | this gives us the (WORD,POS) pos_tags = pos_tagger.tag(tokens) # create the tree, the tree is necessary to do IOB tagging with tree2conlltags # so we need to convert post_tags to tree with ne_chunk tree = ne_chunk(pos_tags) # IOB tagging | this gives us (WORD,POS,TAG) with tree2conlltags iob_tags = tree2conlltags(tree) return iob_tags
def tree2brackets(tree): str, tag = '', '' for item in tree2conlltags(tree): if item[2][0] in {'B', 'O'} and tag: str += tag + '] ' tag = '' if item[2][0] == 'B': tag = item[2].split('-')[1] str += '[' str += item[0] + ' ' if tag: str += tag + '] ' return str.strip()
def on_get(self, req, resp, id): print(id) arts = [] arts_obj = ArticleModel.objects().all_fields().limit(10) for art in arts_obj: title = word_tokenize(art['title']) tagged = pos_tag(title) tree = ne_chunk(tagged) iob_tags = tree2conlltags(tree) print(str(art['_id']), tagged, iob_tags) arts.append({ "_id": str(art["_id"]), "tag": tagged, "tree": iob_tags }) resp.json = {"rslt": json.dumps(arts)}
def calculateParameters(all_cands, doc, scores): params = [] max_cand_score = max(scores.values()) for cand in all_cands: freq = doc.count(cand) if cand not in scores: cand_score = 0. else: cand_score = scores[cand] # / max_cand_score cand_len = len(cand) cand_term_count = len(cand.split()) ne_cand = get_true_case(cand) words = nltk.pos_tag(nltk.word_tokenize(ne_cand)) ne = nltk.tree2conlltags(nltk.ne_chunk(words)) ne = [ ' '.join(word for word, pos, chunk in group).lower() for key, group in itertools.groupby(ne, lambda tpl: tpl[2] != 'O') if key ] ne_cnt = len(ne[0].split()) if ne else 0 first_match = doc.find(cand) / len(doc) last_match = doc.rfind(cand) / len(doc) # if cand_term_count == 1: # cohesion = 0. # else: # cohesion = cand_term_count * (1 + math.log(freq, 10)) * freq / if first_match == last_match: spread = 0. else: spread = last_match - first_match # print([cand_score, freq, cand_len, cand_term_count, first_match, last_match, spread, ne_cnt]) params.append([ cand_score, cand_len, cand_term_count, first_match, last_match, spread, ne_cnt ]) #cand_score, return params
def tweet_ner_tagger(text_list, st, cp): text = [ "URL" if word[0].startswith("http") else word[0] for word in text_list ] gold_tag = [word[1] for word in text_list] tokenized_text = text ner_taggers = st.tag(tokenized_text) pos_taggers = nltk.pos_tag(tokenized_text) chunk_taggers = tree2conlltags(cp.parse(pos_taggers)) ner_sequence = [item[1] for item in ner_taggers] pos_sequence = [item[1] for item in pos_taggers] chunking_sequence = [item[1] for item in chunk_taggers] return text, gold_tag, ner_sequence, pos_sequence, chunking_sequence
def get_iob(rl, name, book_analysis=False): tokens = list( filter(lambda token: token not in string.punctuation, word_tokenize(rl))) tagged_tokens = pos_tag(tokens) ner_tree = ne_chunk(tagged_tokens) iob_tagged = tree2conlltags(ner_tree) persons = list(filter(lambda x: "PERSON" in x[2], iob_tagged)) tokens = list(map(lambda token: str(token).lower(), tokens)) lemmatizer = nltk.stem.WordNetLemmatizer() lemmas = [lemmatizer.lemmatize(token) for token in tokens] stop = stopwords.words("english") no_stopwords = [item for item in lemmas if item not in stop] if book_analysis: print(f"{name} length: {len(rl)}") print(f"{name} persons: {len(persons)}") print(f"{name} tokens: {len(tokens)}") return persons, no_stopwords
def understand(self, sentence): # Break paragraph into sentences tokenized_sentence = sent_tokenize(sentence) # Break sentence into words for sent in tokenized_sentence: tokenized_word = word_tokenize(sent) # Tag corpora with universal POS tagset # For tag list, read https://www.nltk.org/book/ch05.html#tab-universal-tagset pos_tags = nltk.pos_tag(tokenized_word, tagset='universal') # Divide sentence into noun phrases with regular expression grammar = 'NOUN: {<DET>?<ADJ>*<NOUN>}' cp = nltk.RegexpParser(grammar) # Find chunk structure cs = cp.parse(pos_tags) # B-{tag} beginning, I-{tag} inside, O-{tag} outside iob_tags = np.asarray(tree2conlltags(cs)).tolist() # Recognize named entities doc = self.nlp(sent) # Parse word into numeral, ordinal, and time parse = lambda ne: dict([[ _['dim'], _['value']['value'] ] for _ in self.duckling.parse( ne, dim_filter=conf.get_property('duckling')['dimensions'])]) # [Word, character positions and entity type]. For all entity types, read https://spacy.io/api/annotation#named-entities ne = list([ ent.text, ent.start_char, ent.end_char, ent.label_, parse(ent.text) ] for ent in doc.ents) ne_tags = [_.ent_type_ for _ in doc] # Merge iob tags and named entity tags tagged_sent = [ list(np.append(iob_tags[i], ne_tags[i])) for i in range(len(iob_tags)) ] tagged_sent = ''.join(str(x) for x in tagged_sent) self.decide(tagged_sent, ne)
def to_dataset(self, parsed_sentences): """ Transform a list of tagged sentences into a scikit-learn compatible POS dataset """ X, y = [], [] for parsed in parsed_sentences: iob_tagged = tree2conlltags(parsed) words, tags, iob_tags = zip(*iob_tagged) tagged = list(zip(words, tags)) for index in range(len(iob_tagged)): X.append( self._feature_detector(tagged, index, history=iob_tags[:index])) y.append(iob_tags[index]) return X, y
def qa_generator(inputStr): orgininal_statement = inputStr tokens = nltk.word_tokenize(inputStr) tagged = nltk.pos_tag(tokens) copy = tagged #tree2conlltags gives a list of tuples, each tuple has the word, POS, and entity in that order entities = (nltk.tree2conlltags(nltk.ne_chunk(tagged))) print('ENTITIES BELOW') print(entities) #seperates the tuple into lists for the word and its entity words, tags, ent = zip(*entities) words = list(words) ent = list(ent) questions = list() i = 0 while i <= len(entities) - 1: #get words with have pos B-PERSON or I-PERSON #Create a question that has both the first and last name? May not be super critical '''TAGS B-egin - first token of a multi-token entity I-n - inner token of a multi-token entity L-ast - Final token of a multi token entity U-nit - a single-token entity O-ut - a non-entity token ''' if ent[i] == 'B-PERSON': questions.append("Who is " + words[i] + "?") elif ent[i] == 'I-PERSON': questions.append("Who is " + words[i] + "?") elif ent[i] == 'B-ORGANIZATION': questions.append("What is " + words[i] + "?") questions.append("Where is " + words[i] + "?") questions.append("What does " + words[i] + " do?") i = i + 1 for i in questions: print(i)
def ner_analyse(text, chunker): """ extract human activity information from text(filted text with only time labeled sent ) :param text: doc :return: list of tuple(activity elements) """ sents = nltk.sent_tokenize(text) result = [] for sent in sents: if not re.match('(.*\d\d\d\d.*)|(.*\d\ds*)', sent): continue entities = chunker.parse(pos_tag(word_tokenize(sent))) entities = nltk.tree2conlltags(entities) has_per = False has_loc = False has_org = False has_tim = False print('Analysing following sentence:\n{0}'.format( sent.encode('utf-8'))) for entity in entities: # print('Etity[2] is \n{0}'.format(entity[2])) if entity[2] == 'B-per': has_per = True elif entity[2] == 'B-tim': has_tim = True elif entity[2] == 'B-loc': has_loc = True elif entity[2] == 'B-org': has_org = True if has_per and has_tim and (has_loc or has_org): # nltk.conlltags2tree(entities).draw() print('Yes! This sentence has per tim and org|loc\n' 'Its entities are like:\n {0}'.format(entities)) result.append(entities) else: print('No! This sentence does not meet our standard\n' 'Its entities are like:\n{0}'.format(entities)) return result
def tree_forming(self): query = self.__dict__['query'] q_tags = nltk.pos_tag(nltk.word_tokenize(query)) par = nltk.RegexpParser('CHUNK: {<JJ>*<NN | NNS>*}') chunk = par.parse(q_tags) tree_q = nltk.tree2conlltags(chunk) langlist = [] print(tree_q) for tup in tree_q: if tup[1] == 'VB': string = tup[0] elif tup[2] == "B-CHUNK" or tup[2] == "I-CHUNK": string += tup[0] else: continue langlist.append(string) string = "" print(langlist)
def test_regex(frases, testmode): #Separamos en frases. frases = nltk.sent_tokenize(frases) #Tokenizamos. tokens = [nltk.word_tokenize(frase) for frase in frases] #Aplicamos el hidden tager tagged = [hmm_tagger.tag(token) for token in tokens] #Comida: Detecta nombres de comida simples. Nombres seguidos de un adjetivo (pollo asado). Detecta comida tipo "pincho de tortilla" o "pollo con tomate" #Cantidad: Detecta letras y números cp = nltk.RegexpParser(''' COMIDA: {(<ncms000>|<ncmp000>|<ncfs000>|<Fpt>)+(<aq0ms0|aq0fs0>)*<sps00>+(<ncms000>|<ncmp000>|<ncfs000>|<da0fs0>|<Fpt>)+} COMIDA: {(<ncms000>|<ncmp000>|<ncfs000>|<Fpt>)+(<aq0ms0|aq0fs0>)*} CANTIDAD: {(<di0ms0>|<dn0cp0>|<pi0ms000>|<di0fs0>|<Z>)+} ''') #Aplicamos Regexparses sobre nuestros tokens tageados. for s in tagged: result = cp.parse(s) #result.draw() if testmode == True: diccionario = diccionario_regex(result) print(diccionario) iob_tags = tree2conlltags(result) return iob_tags
path=r"/home/arushi/toi_news_articles" len_art=[] city_name=[] for filename in os.listdir(path): print(filename) toi2=open(r"/home/arushi/toi_news_articles/"+filename,"r") data=toi2.read().replace('\n', '') #len_art.append(len(data.split())) words=word_tokenize(data) #print(nltk.pos_tag(words)) tree=entities(data) iob_tags = tree2conlltags(tree) #print(iob_tags) for tup in iob_tags: if(tup[2]=="B-GPE" or tup[2]=="O_GPE" or tup[2]=="I-GPE"): city_name.append(tup[0]) #print(tree) #tree.draw() print(city_name) import pandas as pd df = pd.DataFrame(city_name, columns=["colummn"])