Beispiel #1
0
	def check_if_np_contains_number(self, np):
		parts = np.split(' ')
#		print parts
		if math_modifiers.is_word_number(parts[0]) == True:
			return 1
		if '-' in np:
			parts = np.split('-')
			if math_modifiers.is_word_number(parts[0]) == True:
				return 1
		return 0
Beispiel #2
0
    def write_features(self):
        output_file = open(self.feature_file_path, 'w')
        self.question_prop = entity_properties()
        self.question_prop.find_word_list(
            self.file_path_refrence.synonym_list_path,
            self.file_path_refrence.num_of_dimentions)

        for i in range(self.data_start_index, self.data_end_index):
            print "i is:"
            print i
            if i in self.file_path_refrence.problematic_indexes:
                continue

            self.read_data(i)
            # for pair in self.question_prop.relevant_pairs:
            # 	parts = pair.split('\t')
            # 	np1 = parts[0]
            # 	np2 = parts[1]
            for np1 in self.question_prop.question_strings_np:
                for np2 in self.question_prop.question_strings_np:
                    if np1 == np2:
                        continue
                    # print np1 + ' ' + np2
                    if np1.endswith(np2):
                        if math_modifiers.is_word_number(np1[:np1.index(np2) -
                                                             1]):
                            continue
                    if np2.endswith(np1):
                        if math_modifiers.is_word_number(np2[:np2.index(np1) -
                                                             1]):
                            continue
                    # writing features starts from here
                    # first:: checking for the label

                    class_finder_mode = 'joint'
                    if self.binary_multiclass_mode == 0:
                        class_finder_mode = 'pair_relevant'
                    class_finder_obj = class_finder(class_finder_mode,
                                                    self.question_prop)
                    output_line = class_finder_obj.check_for_label(np1, np2)
                    output_file.write(output_line)

                    features_list = []
                    loglinear_features = logLinear_Feature_finder(
                        self.question_prop)
                    features_list = loglinear_features.appropriate_feature_finder_list(
                        features_list, np1, np1, output_line,
                        self.binary_multiclass_mode, self.test_train_mode)
                    for i in range(0, len(features_list)):
                        output_file.write(' ' + str(i) + ':' +
                                          str(features_list[i]))

                    output_file.write('\n')
            self.question_prop.flush()
Beispiel #3
0
 def check_if_np_contains_number_anywhere(self, np):
     parts = np.split(' ')
     #		print parts
     for part in parts:
         if math_modifiers.is_word_number(part) == True:
             return 1
         if '-' in np:
             parts = np.split('-')
             if math_modifiers.is_word_number(parts) == True:
                 return 1
     return 0
Beispiel #4
0
	def check_for_math_modifier_or_number_inside_np(self, np):
		parts = np.split(' ')
		part = parts[0]
		if self.check_for_known_word(part) == '1':
			return '1'
		elif math_modifiers.is_word_number(part) == True:
			return '1'
		if '-' in np:
			parts = np.split('-')
			part = parts[0]
			if self.check_for_known_word(part) == '1':
				return '1'
			elif math_modifiers.is_word_number(part) == True:
				return '1'
		return '0'
Beispiel #5
0
	def check_if_np_contains_number_anywhere(self, np):
		parts = np.split(' ')
#		print parts
		if len(parts) == 1:
			return (0, -1)
		for i in range(1,len(parts)):
			# print parts
			# print i
			part = parts[i]
			if math_modifiers.is_word_number(part) == True:
				return (1, part)
			if '-' in np:
				parts_dash = np.split('-')
				if math_modifiers.is_word_number(parts_dash) == True:
					return (1, parts_dash)
		return (0, -1)
	def write_features(self):
		output_file = open(self.feature_file_path, 'w')
		self.question_prop = entity_properties()
		self.question_prop.find_word_list(self.file_path_refrence.synonym_list_path, self.file_path_refrence.num_of_dimentions)
		
		for i in range(self.data_start_index, self.data_end_index):
			print "i is:"
			print  i
			if i in self.file_path_refrence.problematic_indexes:
				continue

			self.read_data(i)
			for np1 in self.question_prop.question_strings_np:
				if np1 in self.question_prop.pronoun_list or len(np1.split(' ')) >= 6:
					continue
				for np2 in self.question_prop.question_strings_np:
					if np2 in self.question_prop.pronoun_list or len(np2.split(' ')) >= 6:
						continue
					if np1 == np2:
						continue
					if np1.endswith(np2):
						if math_modifiers.is_word_number(np1[:np1.index(np2)-1]):
							continue
					if np2.endswith(np1):
						if math_modifiers.is_word_number(np2[:np2.index(np1)-1]):
							continue
					start_index = 0
					class_finder_obj = class_finder('str_disjoint', self.question_prop)
					output_line = class_finder_obj.check_for_label(np1, np2)
					output_file.write(output_line)
					
					
					feature_list = []
					basic_features = basic_faeture_finder(self.question_prop)
					feature_list = basic_features.appropriate_feature_finder_list(feature_list, np1)
					feature_list = basic_features.appropriate_feature_finder_list(feature_list, np2)
					useful_np_features = useful_np_feature_finder(self.question_prop)
					feature_list = useful_np_features.appropriate_feature_finder_list(feature_list, np1, np2)
					disjoint_features = disjoint_feature_finder(self.question_prop)
					feature_list = disjoint_features.appropriate_feature_finder_list(feature_list, np1, np2)
					for i in range(0, len(feature_list)):
						output_file.write(' ' + str(i) + ':' + str(feature_list[i]))
					
					output_file.write('\n')

			self.question_prop.flush()
    def calc_merge_count_feature(self, feature_list, feature_name_list, chain1,
                                 chain2):
        chain1_num_of_counts = 0
        for np1 in chain1:
            if np1 in self.noun_phrase_with_counts:
                chain1_num_of_counts = chain1_num_of_counts + 1
            if ' ' in np1:
                parts = np1.split(' ')
                if math_modifiers.is_word_number(parts[0]) == True:
                    rest_nount = np1[np1.index(' '):]
                    if rest_nount in self.noun_phrase_with_counts:
                        chain1_num_of_counts = chain1_num_of_counts + 1

        chain2_num_of_counts = 0
        for np2 in chain2:
            if np2 in self.noun_phrase_with_counts:
                chain2_num_of_counts = chain2_num_of_counts + 1
            if ' ' in np2:
                parts = np2.split(' ')
                if math_modifiers.is_word_number(parts[0]) == True:
                    rest_nount = np2[np2.index(' '):]
                    if rest_nount in self.noun_phrase_with_counts:
                        chain2_num_of_counts = chain2_num_of_counts + 1

        if chain1_num_of_counts > 0 and chain2_num_of_counts > 0:
            feature_list.append(1)
        else:
            feature_list.append(-1)
        feature_name_list.append("chain 1 has counts and chain 2 has counts")
        if chain1_num_of_counts > 0 or chain2_num_of_counts > 0:
            feature_list.append(1)
        else:
            feature_list.append(-1)
        feature_name_list.append("chain 1 has counts or chain 2 has counts")
        if chain1_num_of_counts > 2 and chain2_num_of_counts > 2:
            feature_list.append(1)
        else:
            feature_list.append(-1)
        feature_name_list.append("chain 1 and chain 2 has counts more than 2")
        if chain1_num_of_counts > 2 or chain2_num_of_counts > 2:
            feature_list.append(1)
        else:
            feature_list.append(-1)
        feature_name_list.append("chain 1 or chain 2 has counts more than 2")
        return (feature_list, feature_name_list)
Beispiel #8
0
	def write_features(self):
		output_file = open(self.feature_file_path, 'w')
		self.question_prop = entity_properties()
		for i in range(self.data_start_index, self.data_end_index):
			print "i is:"
			print  i
			if i == 17 or i == 30 or i == 33 or i == 70:
				continue
			self.read_data(i)
			for pair in self.question_prop.relevant_pairs:
				for str_sample in self.non_relevant_string:
					if pair.startswith(str_sample):
						continue
				parts = pair.split('\t')
				np1 = parts[0]
				np2 = parts[1]

				if np1.endswith(np2):
					if math_modifiers.is_word_number(np1[:np1.index(np2)-1]):
						continue
				if np2.endswith(np1):
					if math_modifiers.is_word_number(np2[:np2.index(np1)-1]):
						continue
				# writing features starts from here
				# first:: checking for the label
				class_finder_obj = class_finder('eq_noRel', self.question_prop)
				output_line = class_finder_obj.check_for_label(np1, np2)
				output_file.write(output_line)
				
				
				feature_list = []
				basic_features = basic_faeture_finder(self.question_prop)
				feature_list = basic_features.appropriate_feature_finder_list(feature_list, np1)
				feature_list = basic_features.appropriate_feature_finder_list(feature_list, np2)
				eq_features = equivalence_feature_finder(self.question_prop)
				feature_list = eq_features.appropriate_feature_finder_list(feature_list, np1, np2)

				for i in range(0, len(feature_list)):
					output_file.write(' ' + str(i) + ':' + str(feature_list[i]))

				output_file.write('\n')
			self.question_prop.flush()
Beispiel #9
0
	def find_number_after(self, whole_question, type_index):
		is_word_after_number = False
		space_index = self.find_next_space(whole_question, type_index+1)
		word_count = 0
		beginning_of_the_word_index = type_index
		word_list_inBetween = []
		while is_word_after_number == False:
			beginning_index = space_index + 1
			beginning_of_the_word_index = beginning_index

			space_index = self.find_next_space(whole_question, space_index + 1)

			end_of_the_word_index = space_index
			word_count = word_count + 1
			word_list_inBetween.append(whole_question[beginning_index:end_of_the_word_index])
			is_word_after_number = math_modifiers.is_word_number(whole_question[beginning_index:end_of_the_word_index])
			if space_index == len(whole_question):
				break
		if is_word_after_number == False:
			return (-1, -1, [])
		return (word_count, beginning_of_the_word_index, word_list_inBetween)
Beispiel #10
0
	def find_number_before(self, whole_question, type_index):
		is_word_before_number = False
		space_index = self.find_prev_space(whole_question, type_index)
		word_count = 0 
		beginning_of_the_word_index = 0
		word_list_inBetween = [] 
		while is_word_before_number == False:
			end_of_the_word_index = space_index
			space_index = self.find_prev_space(whole_question, space_index - 1)
			beginning_index = space_index + 1
			if space_index == 0:
				beginning_index = 0
				word_count = word_count + 1
			beginning_of_the_word_index = beginning_index
			word_count = word_count + 1
			word_list_inBetween.append(whole_question[beginning_index:end_of_the_word_index])
			is_word_before_number = math_modifiers.is_word_number(whole_question[beginning_index:end_of_the_word_index])
			if space_index == 0:
				break
		if is_word_before_number == False:
			return (-1, -1, [])
		return (word_count, beginning_of_the_word_index, word_list_inBetween)
Beispiel #11
0
    def write_features(self):
        output_file = open(self.feature_file_path, 'w')
        self.question_prop = entity_properties()
        self.question_prop.find_word_list(
            self.file_path_refrence.synonym_list_path,
            self.file_path_refrence.num_of_dimentions)
        file_path_refrence = file_refrence()
        output_file = open(file_path_refrence.pair_visualize_data_file, 'w')
        for i in range(self.data_start_index, self.data_end_index):
            print "i is:"
            print i
            if i in self.file_path_refrence.problematic_indexes:
                continue

            self.read_data(i)
            for np1 in self.question_prop.question_strings_np:
                for np2 in self.question_prop.question_strings_np:
                    if np1 == np2:
                        continue
                    output_file.write(np1 + '\t' + np2 + '\n')
                    # print np1 + "   " + np2
                    # for pair in self.question_prop.relevant_pairs:
                    # 	for str_sample in self.non_relevant_string:
                    # 		if pair.startswith(str_sample):
                    # 			continue
                    # 	parts = pair.split('\t')
                    # 	np1 = parts[0]
                    # 	np2 = parts[1]

                    # check if one is number instantiation of the other don't check the pair
                    # sample shoes	100 shoes
                    if np1.endswith(np2):
                        if math_modifiers.is_word_number(np1[:np1.index(np2) -
                                                             1]):
                            continue
                    if np2.endswith(np1):
                        if math_modifiers.is_word_number(np2[:np2.index(np1) -
                                                             1]):
                            continue
                    # writing features starts from here
                    # first:: checking for the label

                    class_finder_obj = class_finder('joint',
                                                    self.question_prop)
                    output_line = class_finder_obj.check_for_label(np1, np2)
                    output_file.write(output_line)

                    features_list = []

                    basic_features = basic_feature_finder_revised(
                        self.question_prop)
                    features_list = basic_features.appropriate_feature_finder_list(
                        features_list, np1, np2)
                    # features_list = basic_features.appropriate_feature_finder_list(features_list, np2)
                    useful_np_features = useful_np_feature_finder_revised(
                        self.question_prop)
                    features_list = useful_np_features.appropriate_feature_finder_list(
                        features_list, np1, np2)
                    disjoint_features = disjoint_feature_finder_revised(
                        self.question_prop)
                    features_list = disjoint_features.appropriate_feature_finder_list(
                        features_list, np1, np2)
                    subset_features = subset_feature_finder_revised(
                        self.question_prop)
                    features_list = subset_features.appropriate_feature_finder_list(
                        features_list, np1, np2)
                    eq_features = equivalence_feature_finder_revised(
                        self.question_prop)
                    features_list = eq_features.appropriate_feature_finder_list(
                        features_list, np1, np2)
                    for i in range(0, len(features_list)):
                        output_file.write(' ' + str(i) + ':' +
                                          str(features_list[i]))

                    output_file.write('\n')
            self.question_prop.flush()
Beispiel #12
0
                if not line_parts[useful_indexes[j]].startswith('-'):
                    count = count + 1
                    row_index.append(line_count)
                    column_index.append(line_parts[useful_indexes[j]])
                    seen_nps.append(line_parts[useful_indexes[j]])
                box_row.append(line_parts[useful_indexes[j]])
            if len(box_row) != 0:
                true_table.append(box_row)
        # print 'here'

        num_list = []
        word_list = []

        for np in np_list:
            if len(np.split(' ')) == 1 and '-' not in np:
                if math_modifiers.is_word_number(np) == True:
                    num_list.append(np)
                else:
                    if np not in word_list:
                        word_list.append(np)
            elif '-' in np:
                parts = np.split(' ')
                if math_modifiers.is_word_number(parts[0]):
                    num_list.append(parts[0])
                    rest_noun = ''
                    for jj in range(1, len(parts)):
                        rest_noun = rest_noun + ' ' + parts[jj]
                    rest_noun = rest_noun[1:]
                    if rest_noun not in word_list:
                        word_list.append(rest_noun)
                else: