def ig_eng(path_load, path_ig):
    grades = os.listdir(path_load)
    grades.remove('.DS_Store')

    ig_words = get_all_ig(path_load, grades)

    for grade in grades:
        path_to_grade = path_load + grade + "/"
        for filename in os.listdir(path_to_grade):
            if filename != '.DS_Store':
                with open(path_to_grade + filename, "r") as my_file:
                    data = my_file.read().replace('\n', '')
                    data = data.decode('utf-8')

                text = []
                for sent in nltk.sent_tokenize(data):
                    for chunk in nltk.pos_tag(nltk.word_tokenize(sent)):
                        if isinstance(chunk, tuple):
                            if chunk[0] in ig_words:
                                text.append(chunk[0])
                            else:
                                text.append(chunk[1])

                if not os.path.exists(os.path.dirname(path_ig + grade + "/" + filename)):
                    os.makedirs(os.path.dirname(path_ig + grade + "/" + filename))

                ig_file = open(path_ig + grade + "/" + filename, 'w+')

                for word in text:
                    ig_file.write("%s " % word.encode('utf-8'))
                ig_file.close()
def ig_eng(path_load, path_ig):
    grades = os.listdir(path_load)
    grades.remove('.DS_Store')

    ig_words = get_all_ig(path_load, grades)

    for grade in grades:
        path_to_grade = path_load + grade + "/"
        for filename in os.listdir(path_to_grade):
            if filename != '.DS_Store':
                with open(path_to_grade + filename, "r") as my_file:
                    data = my_file.read().replace('\n', '')
                    data = data.decode('utf-8')

                text = []
                for sent in nltk.sent_tokenize(data):
                    for chunk in nltk.pos_tag(nltk.word_tokenize(sent)):
                        if isinstance(chunk, tuple):
                            if chunk[0] in ig_words:
                                text.append(chunk[0])
                            else:
                                text.append(chunk[1])

                if not os.path.exists(
                        os.path.dirname(path_ig + grade + "/" + filename)):
                    os.makedirs(
                        os.path.dirname(path_ig + grade + "/" + filename))

                ig_file = open(path_ig + grade + "/" + filename, 'w+')

                for word in text:
                    ig_file.write("%s " % word.encode('utf-8'))
                ig_file.close()
def ig_rus(path_load, path_ig):
    path_to_api = '/Users/Ivan/PycharmProject/ReadAbility/ApiData/rus/pos/'
    grades = os.listdir(path_load)
    grades.remove('.DS_Store')
    ig_words = get_all_ig(path_load, grades)
    for grade in grades:
        path_to_grade = path_load + grade + "/"
        for filename in os.listdir(path_to_grade):
            if filename != '.DS_Store':
                with open(
                        path_to_api + grade + "/" +
                        filename[:filename.index('.txt')] + '.xml',
                        "r") as my_file:
                    data = my_file.read().replace('\n', '')

                text = []
                root = ElementTree.fromstring(data)

                for word in root.iter('I-annotation'):
                    start_word = 0
                    end_word = 0
                    pos_tag = ''

                    for child in word:
                        if child.tag == 'start':
                            start_word = int(child.text)
                        if child.tag == 'end':
                            end_word = int(child.text)
                        if child.tag == 'value':
                            pos_tag = child[0].text
                    text_word = root[0].text[start_word:end_word]
                    if text_word in ig_words:
                        text.append(text_word)
                    else:
                        text.append(pos_tag)

                    if not os.path.exists(
                            os.path.dirname(path_ig + grade + "/" + filename)):
                        os.makedirs(
                            os.path.dirname(path_ig + grade + "/" + filename))
                    ig_file = open(path_ig + grade + "/" + filename, 'w+')

                    for item in text:
                        ig_file.write("%s " % item.encode('utf-8'))
                    ig_file.close()
def ig_rus(path_load, path_ig):
    path_to_api = '/Users/Ivan/PycharmProject/ReadAbility/ApiData/rus/pos/'
    grades = os.listdir(path_load)
    grades.remove('.DS_Store')
    ig_words = get_all_ig(path_load, grades)
    for grade in grades:
        path_to_grade = path_load + grade + "/"
        for filename in os.listdir(path_to_grade):
            if filename != '.DS_Store':
                with open(path_to_api + grade + "/" + filename[:filename.index('.txt')] + '.xml', "r") as my_file:
                    data = my_file.read().replace('\n', '')

                text = []
                root = ElementTree.fromstring(data)

                for word in root.iter('I-annotation'):
                    start_word = 0
                    end_word = 0
                    pos_tag = ''

                    for child in word:
                        if child.tag == 'start':
                            start_word = int(child.text)
                        if child.tag == 'end':
                            end_word = int(child.text)
                        if child.tag == 'value':
                            pos_tag = child[0].text
                    text_word = root[0].text[start_word:end_word]
                    if text_word in ig_words:
                        text.append(text_word)
                    else:
                        text.append(pos_tag)

                    if not os.path.exists(os.path.dirname(path_ig + grade + "/" + filename)):
                        os.makedirs(os.path.dirname(path_ig + grade + "/" + filename))
                    ig_file = open(path_ig + grade + "/" + filename, 'w+')

                    for item in text:
                        ig_file.write("%s " % item.encode('utf-8'))
                    ig_file.close()